diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -483,6 +483,11 @@
 
 option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF)
 
+option(LLVM_ENABLE_MDL "Enable use of MDL infrastructure" OFF)
+if (LLVM_ENABLE_MDL)
+  set(ENABLE_MDL_USE 1)
+endif()
+
 set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF)
 if (MINGW)
   # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix
@@ -1157,6 +1162,10 @@
 add_subdirectory(lib)
 
 if( LLVM_INCLUDE_UTILS )
+  if (LLVM_ENABLE_MDL)
+    add_subdirectory(utils/TdScan)
+    add_subdirectory(utils/MdlCompiler)
+  endif()
   add_subdirectory(utils/FileCheck)
   add_subdirectory(utils/PerfectShuffle)
   add_subdirectory(utils/count)
diff --git a/llvm/docs/Mdl/BundlePacking.md b/llvm/docs/Mdl/BundlePacking.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/BundlePacking.md
@@ -0,0 +1,252 @@
+
+
+## MDL-Based Bundle Packing and Pool Allocation
+
+Reid Tatge         tatge@google.com
+
+
+[TOC]
+
+
+
+### Introduction
+
+The MDL language enables the compiler writer to describe the target machine microarchitecture in a first-class language. From that description, we create a database of information with which we can implement low-level components of a compiler backend, including latency management, pipeline modeling, and bundle packing. Normally these passes require carefully written target-specific code. With the MDL infrastructure, we can write efficient, fast and target-independent implementations of these components. This document discusses some of the design tradeoffs that we are trying to address in:
+
+
+
+*   **Bundle packing:** For VLIW architectures, we need to determine which instructions can be issued in parallel. 
+*   **Resource pool allocation:** VLIW processors commonly have instructions which may reserve some number of resources from a common, possibly shared, resource pool.  
+
+
+### Bundle Packing
+
+A VLIW instruction scheduler needs to know whether a particular set of instructions can be issued in parallel.  We refer to this as “bundle packing”. 
+
+Typically a scheduler maintains a list of instructions which are “ready” to be scheduled in the context of an execution cycle, ie a “ready list”.  The candidate instructions typically have different “priorities” - it may be more urgent to schedule some instructions at a particular time, such as those on a critical path, vs other instructions.  This is a key part of the scheduling algorithm, and largely machine independent.  However the scheduler needs a way of determining if a particular set of instructions can be \*issued\* in parallel, and this requires significant machine knowledge. In the low-level bundling infrastructure, we don’t want to address the priority problem - that is a higher-level concern that is the domain of the scheduler. Instead the bundler is focused primarily on resource management.
+
+In addition to knowing whether a set of instructions can be bundled, we need to know whether the bundle can be issued in the context of the current partially generated schedule.  We need to comprehend any structural pipeline hazards and/or reserved resources that occur in nearby cycles.  This implies that we need a way for the scheduler to maintain a temporal reservation table of resources and hazards while scheduling a section of code.  (In the absence of structural hazards, we want to be able to skip or minimize this step.)
+
+Additionally, for loop scheduling, we also want a way of defining and using a cyclical or modulo resource reservation table.
+
+So we need to provide a few services to the scheduler:
+
+
+
+*   A resource reservation table that models resources and hazards over some portion of a schedule. This could be null if the architecture doesn’t have structural hazards.
+*   An object that represents a bundle which includes a list of instructions and the associated functional units and resources it consumes (over time).
+*   A method for determining whether an instruction can be added to a bundle.
+
+This approach allows a client scheduler to manage the priority of instructions and the order in which they are added to the schedule.  A cycle-by-cycle list scheduler can incrementally build up the instructions for a particular cycle, or for a range of cycles; a loop scheduler can incrementally build up the schedule for an entire loop.
+
+
+#### Resource management
+
+The primary task of the bundle packer is to assign subunits to candidate instructions such that the resources used by each instruction don’t conflict with the resources used by any other instruction in the bundle, or any resources reserved by other bundles over the execution lifetime of this bundle.  Recall that resources are used to model:
+
+
+
+*   Functional unit assignment
+*   Issue slot assignment
+*   Arbitrary encoding bits
+*   Pipeline hazards in general
+*   Pools of resources
+
+Each subunit candidate (for each instruction) models a particular behavior of an instruction - on a specific functional unit, in a specific issue slot, using specific resources.   A subunit precisely describes which set of resources are used by the instruction, and in what pipeline phase. 
+
+To illustrate this, if we have an instruction ADD that can run on either functional unit A1 or A2, and it can be issued in one of three issue slots (I1, I2, and I3), it will have at least 6 candidate subunits to which it can be assigned: A1/I1, A1/I2, A1/I3, A2/I1, A2/I2, A2/I3.
+
+Identical functional unit candidates and identical issue slots are essentially implicitly defined resource pools for which we enumerate all possible permutations in distinct subunits. Consequently, they don’t have to be explicitly assigned as with user-defined resource pools.
+
+Note that the MDL compiler can in fact treat all resource pools similarly to functional unit and issue slot resources, and it does so, conditionally, for _small_ pools. However, when an instruction has several pool requests, or has requests for larger pools, it can lead to a combinatorial explosion of subunit candidates, and in general can be managed more efficiently with explicit pool allocation.
+
+
+#### Interface to the bundle packer
+
+The primary interface to the bundle packer allows the client to incrementally add instructions to a bundle, given a current bundle and a current reservation table:
+
+
+```
+    bool AddToBundle(SlotSet &bundle, SlotDesc &candidate, Reservations &res);
+```
+
+
+ \
+A candidate is created with one of the the following constructors:
+
+
+```
+    SlotDesc(MCInst *instruction, MCSubtargetInfo *STI, MCInstrInfo *MCII);
+    SlotDesc(MachineInstr *instruction, TargetSubtargetInfo *STI);
+```
+
+
+AddToBundle\*() attempts to add the candidate instruction to the input bundle, using the input Reservations object as an _initial _reservation table.  If the instruction can be scheduled, it adds the instruction to the bundle, and returns true. Note that it does _not_ update the reservation table, this is currently done as a separate step when the bundle is “complete”: 
+
+
+```
+    bool AddBundleToReservation(SlotSet &bundle, Reservations &res);
+```
+
+
+This two-step approach allows a scheduler to use the current state of the reservation table as an input to the bundling process, but not update it until it is ready to commit to a particular fully formed bundle. It also allows the bundle packer itself to be stateless - the state is contained in the bundle itself, and the reservation table that it maintains.
+
+AddToBundle never ejects instructions from a bundle in order to allow the addition of a new instruction. This is based on the current assumption that instructions are passed to the bundle packer in priority order, so it would not make sense to eject a higher-priority instruction for a lower-priority instruction. 
+
+This does not preclude us from designing an API to find an instruction in a bundle to eject to make it possible to schedule a new candidate.  This is in fact trivial to write given the MDL infrastructure.
+
+This approach treats resource pool allocation as a separate problem.  Once we determine that a set of instructions can be bundled at a particular cycle in a schedule, we can independently attempt to find a pool allocation for that bundle. 
+
+
+### Resource Pool Allocation
+
+A resource pool is a set of resources that can be shared between different instructions.  Rather than statically reserving a particular resource, an instruction can “request” a resource from a shared pool of resources.   Examples of this are a common feature of VLIW architectures, and include:
+
+
+
+*   A pool of identical (or at least similar) functional units,
+*   A pool of issue slots for a cluster of functional units,
+*   Encoding bits for immediate operands,
+*   Register file ports (which may be limited to a maximum number of references per cycle.)
+
+This is a general allocation problem where a subunit asserts that an instruction needs _m_ of _n_ resources from a pool which it shares with other instructions running in parallel with it. The compiler must find an allocation which satisfies all the pool requests for a bundle of instructions.
+
+There are a few architectural aspects that complicate the allocation algorithm:
+
+
+
+1. Pool entries can be exclusively reserved or shared between instructions. An example of sharing is immediate encoding bits that can be used by instructions that use the same immediate.  An exclusive resource example is opcode encoding bits, or a functional unit assignment.
+2. There are three ways an instruction can reference a pool:
+    *   A specific member of a pool  (pool[2]),
+    *   A subset of elements of a pool (pool[3..4]),
+    *   A number of elements of a pool (pool:3) 
+3. An instruction instance may request zero, one or more members of a pool. (Clearly, a request for zero resources is trivial to accommodate.)
+4. While pools are often associated with encoding bits and issue slots, they can be generalized to reserving resources over any range of pipeline phases.  However, we explicitly require shared-value resources to be declared with a specific pipeline phase.
+5. Instructions may have different pool requirements depending on which functional unit they are issued on.
+
+There are two ways a pooled resource can be allocated to an instruction:
+
+
+
+1. The subunit can state a specific member of a pool be allocated to that instruction. The resource is effectively “preallocated” to instructions assigned to that subunit.
+2. A subunit can request a number of resources from a specific pool.
+
+Note: in cases where the MDL compiler chooses to decompose pools into distinct resources associated with different subunits (discussed above in resource management), there is no need to explicitly allocate those resources as a pool.  Ideally, we could do that for all resource pools, in which case we can skip explicit pool allocation. In the current MDL compiler, this is always done for functional units and issue slots, and for very small pools (2-3 members) in some circumstances. 
+
+
+#### Compile-Time Complexity Considerations
+
+When the bundle packer finds a subunit allocation for a given set of instructions, it collects all the resource pool requests for all assigned subunits and attempts to allocate all of them.  If it succeeds, the bundle is legal.
+
+In the current implementation, if the allocation fails, the current bundle fails - in particular, the most recently added instruction is rejected. This is based on several observations:
+
+
+
+*   Resource pools are a relatively uncommon architectural feature, even on VLIW processors.  This is particularly true if we ignore functional unit and issue slot pools.
+*   When pools do exist, requests for a pooled resource are typically tied to the instruction, rather than the subunit or functional unit they are assigned. Put differently, all the subunit candidates for a particular instruction will, in general, all have the same set of pool requests.
+
+For processors where this isn’t the case (easily checked in the MDL compiler), we would want to add code to make the bundle packer backtrack over the subunits when an allocation isn’t found. This is a significant reduction of complexity for the bundle packer.  We don’t anticipate this being a problem for a real processor.  
+
+One common scenario we do anticipate is resource pools where some instructions tie specific members of a pool to a particular functional unit, while other instructions may use any member of the pool. An example of this when a VLIW has shared register ports accessible by several functional units.  The resources tied to a particular functional unit become “preallocated” resources, which are assigned by the initial bundle packer. The un-tied resources are then allocated in the pool allocator. If all the requests are of the same size (which is typical), we can always find allocations, assuming one is feasible. But in the case where preallocated resources are smaller than allocated resources, the bundle packer could create pre-allocations which would make it infeasible to find a legal allocation.  (Example: a pool has 3 members (a, b, c).  Instruction X can use “a”, “b”, or “c”, and instruction Y can use “ab” or “bc”.  If “b” is preallocated to X, we cannot allocate Y’s request)  Again, we don’t anticipate this being a problem in real processors.
+
+Given that significant simplification, allocation of all of the pool requests associated with a bundle is still a complex task, which has the following parameters:
+
+
+
+*   We have a list of instructions each of which may contain pool requests for various pools.
+*   We have a reservation table which may indicate that some members of the pools may already be allocated to instructions (in the bundle).
+*   Each pool request includes:
+    *   A pool identifier,
+    *   The number of resources needed from the pool (0 through pool size),
+    *   The set of suitable resources in the pool,
+    *   Whether or not the resource can be shared with other requests (and a value to share),
+    *   What pipeline phase its needed in. 
+
+Generally we’d like to sort the pool requests so that we allocate the largest requests in the most constrained pools first.  This allows a simple, greedy allocation algorithm to find a minimal allocation quickly with a single linear pass over the allocation request list.  However, this is expensive, since we do this for each instruction as it is added to a bundle. 
+
+
+#### Allocation Algorithm
+
+There are a number of things we can do to reduce the O(n log n) behavior of sorting the list:
+
+
+
+1. Allocate a separate list of requests for each pool
+2. Or: Allocate a separate list for each subpool, if any (and allocate the smallest first)
+3. Or: Allocate separate lists for each allocation size, for each pool/subpool (and allocate the largest requests first)
+
+Organizing by pool id divides “n” by the number of unique pools. However, since we can easily identify all the subpools encountered in the machine description (at machine-description compile time), we can further simplify the sorting. By further separating out pool-size requests (also statically discoverable from the machine description), we can greedily allocate the largest requests first, and further minimize the sorting effort. Rather than sorting, we’re simply collating requests into lists of similar requests _that can be allocated in any order_.
+
+One complexity of this approach is that pool requests sizes are not always statically known at MDL build time, so a particular request may have to calculate the number of resources needed based on an operand’s value. This implies that any particular pool request can’t necessarily have its final pool id calculated by the MDL compiler. However, the MDL compiler can statically determine a maximum pool size request (worst case this is the entire pool, best case this is discoverable in the machine description). 
+
+An few examples: 
+
+
+
+1. if we have 4 pools, each with only single-resource requests, we would have 4 unique pool ids (1 through 4).  
+2. However, if pool 3 had requests for 1 or 2 resources, we could add a subpool for pool 3, and renumber the pool ids as 1, 2, 3, 4, and 5.  
+3. If each pool could have 1 or 2 entries allocated, we would introduce a subpool for each pool, and would produce the pool ids 1-8.
+
+In any case, the “final” pool id would be calculated as the “base” pool id, plus the overall pool size, minus a specific size request.  (Note per example 2: there’s no reason to have each pool have the same number of size-based lists. In general, we would expect most pools would only have one allocation request size.)
+
+If we only have one pool, or only one allocation size for any pool/subpool, the algorithm is the same.  In any case, the MDL compiler creates the pool ids for each subpool/request-size, and  provides a trivial mapping function from an arbitrary pool request to the appropriate allocation set.
+
+Once requests are collated into the appropriate allocation set, the allocation algorithm is trivial:  for each request in a set, step through the reservation table entries for the pool to find the first resource(s) that aren’t already reserved, and allocate them to the current instruction (mark them allocated).
+
+This algorithm is orthogonal to whether or not the resources are shared. Each allocated pooled resource must have a value that represents the value needed by its current clients. Sharing allocated resources is a simple table lookup. 
+
+There are a few refinements to this algorithm:
+
+
+
+1. For pools which never have preallocated members, there’s no reason to check the reservation table.
+2. If all requests for a pool are the same size, it is be sufficient to simply add up the requests, and ensure they are fewer than the total available pool size (also see item 1).
+
+Note: there may be processors where resource sharing is non-trivial, since the equivalence check is non-obvious or non-trivial.  We don’t have a great solution for this at this point.  We can do “simple” checks for equivalence, and believe that may be sufficient. In the initial implementation of the allocator, we don’t expect to implement sharing, even though we have the information to do it. Its more time consuming, and we don’t believe its particularly important.
+
+
+#### Sharing Resources
+
+Resources used for encoding are sometimes shared between instructions in a bundle. Two examples of this are register ports and immediate operands. If two instructions have operands that use identical encoding for the constant, the underlying architecture may allow them to share encoding bits, even if the decoded values are interpreted differently.
+
+Encoding of immediate operands is particularly important, since processors often don’t have many encoding bits to dedicate to immediate values.  The values are shared and/or compressed. In general, in LLVM we don’t want to “expose” compression methods in the instruction description, since it complicates semantic analysis of instructions - so immediates are typically encode in the IR in their unadulterated form.
+
+In the general case, instructions may use more than one resource to encode an immediate, encoding different parts of the immediate in different resources.  In this case, we want to be able to share the partial values with other instructions.
+
+An example where two instructions encode different immediate operands identically
+
+     mov\_i16                 8,r1
+
+     load\_scaled\_word  32(r2),r3          // instruction encodes the unscaled value (as 8)
+
+An example where one of a pair of resources could be shared:
+
+     mov\_i32    0x12345678,r1             // uses 2 slots to encode 0x1234 and 0x5678
+
+     mov\_i16   0x1234,r2                      // uses 1 slot to encode 0x1234
+
+     mov\_i16   0x5678,r3                      // uses 1 slot to encode 0x5678
+
+An example where the bits are interpreted differently:
+
+     addf          1.0,r1                            // adds 0x3F800000 (1.0) to r1
+
+     addi          1065353216,r2             // adds integer constant (0x3F80000) to r2
+
+     addi          16256,r3                       // adds the high half of 0x3F800000 to a register
+
+The compiler needs to know how the bits of an immediate operand will be represented in resources (typically encoding bits) in order to share them.  In the general case, this could be arbitrarily complex, but we want a reasonably simple mechanism that handles common cases. We do not need to check for legality of the immediate value - it is safely assumed that the construction of the instruction and operand performed the legalization.
+
+An additional complexity is that immediate operands can be overloaded so that different ranges of values may be encoded, and we only actually encode a subset of all the bits, but a different subset depending on the value.  For example, we may have an operand type that can represent either of the two values 0x00001234 or 0x12340000 in a single resource, but only encoding the significant bits. While it might be better to define two different operands, that can lead to an explosion of instruction definitions. So we need a methodology that identifies both the type of encoding, and which bits to encode.
+
+ \
+We need to introduce an operand attribute that characterizes the encoding type and decomposes an operand’s value (as represented in the compiler representation) into one or more constituent values that will be directly used to encode the representation of the value in the resource(s).  Note that this doesn’t need to identify the exact encoding bits, only a canonical representation of those bits that can be compared for equality with different operand values.  In general, we want to strip off prefixes and suffixes to identify the significant bits of an operand.
+
+The default case is that the immediate value is encoded exactly as-is in a single resource. For this case, we don’t need any additional information about the operand - we simply use the operand value directly.
+
+If the value is encoded using more than one resource, we need a way of decomposing the value to identify the parts that will be encoded in different resources.  We may also want to have several different ways of encoding values - in one or more resources.
+
+A simple approach would be to have a syntax that identifies which bits of a value are prefixes (leading 1s or 0s), the value itself, and suffixes (typically trailing 0’s).  
+
+ 
+
diff --git a/llvm/docs/Mdl/CmakeChanges.md b/llvm/docs/Mdl/CmakeChanges.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/CmakeChanges.md
@@ -0,0 +1,119 @@
+
+
+## CMake for MDL-LLVM integration
+
+
+#### **Overview:**
+
+Modify LLVM’s CMake  files to support the use of the MDL compiler in the building of clang.  The use of the MDL is optional.  It currently supports architectures that use SchedModels and/or Itineraries to implement instruction scheduling. The model we currently want to support is to automatically generate the MDL file from tablegen information, then use the MDL compiler output as part of the build.  Later, we will want to support the hand-writing of MDL files, so at that point the process will change.
+
+Note that there are no new libraries introduced by this change.  We’re adding a few new source files to CodeGen, and we’re generating code which is included by existing CodeGen and MC source files (much like TableGen generated files).
+
+Applies to architectures that do instruction scheduling:
+
+
+
+*   AArch64, AMDGPU, ARM, Hexagon, Lanai, Mips, PowerPC, RISCV Sparc, SystemZ, X86
+
+All other targets don’t define an architecture spec, so there’s no need to use MDL:
+
+
+
+*   ARC, AVR, BPF, CSKY, LoongArch, M68k, MSP430, NVPTX, VE, WebAssembly, XCore
+
+
+#### **CMake Configuration:**
+
+We’ve added a CMake variable that controls whether the MDL infrastructure is included in the LLVM build: LLVM\_ENABLE\_MDL.   It is OFF by default.  Setting it to ON will modify the build process to use the TdScan and MdlCompiler utilities to create and compile MDL descriptions of each target architecture, and include the generated code in the MC library.
+
+
+#### **Required Tools:**
+
+Antlr4: 
+
+You need the Antlr4 tool to build the MDL compiler.  You can download the complete jar file at: https://www.antlr.org/download.html, at least version 4.10. Copy the file to /usr/local/lib.
+
+Python:
+
+You need python to run Antlr: sudo apt-get install python.
+
+
+#### **Details:**
+
+We’ve added two new utilities to llvm that work with TableGen output files to produce code which is used in Codegen and MC libraries.  The two utilities are:
+
+
+
+*   **utils/TdScan** - This scrapes architecture information from a target’s tablegen files and produces two “mdl” files that uses our MDL language to describe the architecture.
+*   **utils/MdlCompiler** - a compiler for the MDL language that reads a .mdl file and produces C source code that is #included by Target and MC libraries.
+
+To summarize the modification of the build process:
+
+
+
+1. We instruct TableGen to produce a “_$TARGET_.txt” file, using the –print\_records flag.
+2. We use tdscan to read the generated “$TARGET.txt” and produce two files:
+    1. _ “$TARGET_.mdl”: contains microarchitecture information
+    2. “_$TARGET_\_instructions.mdl”: contains register, operand, and instruction information - used to sync the MDL description with the LLVM description.  This file is imported into _$TARGET_.mdl.
+3. We use mdl to compile the _$TARGET_.mdl” file into 3 “.inc” files which are included in Target and MC library source code (much like the output of tablegen.)
+    3. _$TARGET_GenMdlInfo.inc
+    4. _$TARGET_GenMdlTarget.inc
+    5. _$TARGET_GenMdlInfo.h
+
+    These should be generated and moved to the same directory as the tablegen output files.
+
+
+We added five new C++ source files:
+
+
+
+1. llvm/include/llvm/CodeGen/MDLBundle.h
+2. llvm/include/llvm/MDLHazardRecognizer.h
+3. llvm/include/llvm/MC/MDLInstrInfo.h
+4. llvm/include/llvm/MC/MDLInfo.h
+5. llvm/lib/MC/MDLInstrInfo.cpp
+
+There are a few new dependencies for LLVM:
+
+
+
+1. The tdscan step is dependent on the target’s tablegen records.txt files (and on the tdscan utility)
+2. The mdl compilation step is dependent on the file produced by tdscan (and on the mdl utility)
+3. The Target and MC library sources are dependent on the three files generated by the MDL compiler:
+    1. llvm/lib/Target/\*/\*Subtarget.cpp:
+        *   #includes _$TARGET_GenMdlInfo.inc
+        *   #includes _$TARGET_GenMdlTarget.inc
+    2. llvm/lib/Target/\*/\*MCTargetDesc.cpp
+        *   #includes _$TARGET_GenMdlInfo.\* 
+4. New header file dependencies:
+    3. MDLInstrInfo.h: used by
+        *   lib/CodeGen/TargetSchedule.cpp
+        *   lib/MC/MCSchedule.cp
+        *   include/llvm/CodeGen/MDLBundle.h
+    4. MDLInfo.h: used by
+        *   lib/CodeGen/TargetSchedule.cpp
+        *   lib/CodeGen/TargetSubtargetInfo.cpp
+        *   include/llvm/MC/MdlInstrInfo.h
+        *   include/llvm/CodeGen/TargetSubtargetInfo.h
+        *   include/LLVM/MC/MCSubtargetInfo.h
+    5. MDLBundle.h: used by
+        *   lib/CodeGen/MDLBundle.cpp
+    6. MDLHazardRecognizer.h: used by
+        *   llvm/include/llvm/CodeGen/DFAPacketizer.h
+        *   llvm/lib/CodeGen/DFAPacketizer.cpp
+        *   llvm/lib/CodeGen/TargetInstrInfo.cpp
+
+
+#### Note on the use of ANTLR
+
+The MDL compiler uses Antlr4 to produce a parser and lexer for the MDL language. The runtime used by the generated parser uses RTTI, which is normally eschewed by LLVM.  However, the MDL compiler itself, and the generated parser itself, do not use RTTI.
+
+However, to correctly link the MDL compiler, generated parser/lexer, and Antlr runtime, we need to compile the MDL compiler with -rtti enabled.  And because of that, we need to compile a few items that we depend on from the llvmSupport library with RTTI as well:
+
+
+
+*   CommandLine.cpp
+*   FormatVariadic.cpp
+*   Error.cpp
+
+We simply include these files directly in the MDL compiler CMake, and compile them with RTTI flags.  It would be great if we could figure out a way to do this more cleanly.  
diff --git a/llvm/docs/Mdl/ForwardingNetworks.md b/llvm/docs/Mdl/ForwardingNetworks.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/ForwardingNetworks.md
@@ -0,0 +1,243 @@
+
+## Modeling Forwarding Networks
+
+Reid Tatge         tatge@google.com
+
+
+[TOC]
+
+
+
+### Introduction
+
+To reduce the apparent latency between instructions that write and read register values, CPUs use “forwarding” networks: a set of connections between processor functional units which move data directly between the functional units, rather than routing them through a register file. This network reduces the latency between instructions running on connected units.
+
+Forwarding networks are usually not “complete”, where all functional units are connected to all other functional units.  Instead, the network typically only connects units which are physically close to each other, and in particular each unit to itself.  In practice, the network can be quite sparse. However, its worth noting that a fully connected, uniform network doesn’t have to be explicitly modeled at all.
+
+
+### Background
+
+If we want to accurately model latencies between instructions, we need to comprehend forwarding networks.  The need to accurately model latencies varies by the class of architecture we’re modeling:
+
+
+
+1. Dynamic-issue machines,
+2. Statically scheduled machines with protected pipelines,
+3. Statically scheduled machines with unprotected pipelines.
+
+We’d like to model each of these classes effectively. 
+
+The primary issue common to all three architecture classes is the need to model the behavior of instructions that can issue on more than one functional unit.  If the forwarding network is non-uniform, the effective latency can potentially be different on each candidate unit. Further, if we’re compiling for a dynamic-issue machine, we cannot statically determine which functional unit it will be issued on.  On a statically scheduled machine, we cannot determine its “final” behavior until we’ve assigned a functional unit to the instruction instance. 
+
+Here’s a more concrete example: we have two instructions _A_ and _B_.  _A_ can run on functional units _A<sub>1</sub>_ and _A<sub>2</sub>_, while _B_ can run on _B<sub>1</sub>_ or _B<sub>2</sub>_.  We have a forwarding path only between _A<sub>1</sub>_ and _B<sub>1</sub>._
+
+On a dynamic-issue machine, we _cannot_ know which unit the instructions issue on, so we can’t, with certainty, determine the latency between _A_ and _B_, so we must use a heuristic, perhaps best-case, worst-case, or some other heuristic.
+
+On a statically scheduled machine, if the instructions haven’t been assigned functional units, we similarly cannot know for certain what the latency will be. Again we must use a heuristic, but for an unprotected pipeline we must be conservative.  Another approach for this class of machine is to prune or restrict the functional unit candidates for the instructions so that we can make more accurate latency calculations.
+
+
+#### Dynamic-Issue Machines
+
+This class of machine describes most modern general-purpose microprocessors: ARM, X86, PPC, Mips, etc.  They are superscalar, multi-issue, out-of-order machines which dynamically issue and aggressively reorder the execution of instructions to avoid pipeline stalls. They typically have comprehensive forwarding networks, including full register renaming support.  Further, several of these processors decompose the exposed ISA instructions into undocumented micro-ops which are separately issued into one of several parallel pipelines. 
+
+For this class of architecture, precise modeling of the latency between pairs of instructions is at best a heuristic that is useful for motivating the order of instructions in memory, and grouping of instructions that can be issued in parallel.
+
+Note that this is not an excuse to provide an inadequate compiler model for these architectures, just an observation that it is generally impossible to provide a perfect model. We are largely just trying to help the processor do a better job. 
+
+
+#### Statically scheduled machines with protected pipelines
+
+In this machine class, instructions are issued and executed in-order, but the processor inserts stalls when the input values of an instruction are not available yet.  This includes many VLIW’s and embedded microcontrollers (including Google TPUs).
+
+For these processors, the goal is to generate code which avoids predictable stalls, and therefore we need more accurate latency information than in a dynamic-issue machine. Non-uniform forwarding networks can complicate this, so the compiler may use heuristics or a functional-unit allocation mechanism to improve the accuracy of latency calculations.
+
+
+#### Statically scheduled machines with unprotected pipelines
+
+This is a smaller class of machine where the processor has few or no hardware assists for managing latency, typically found in embedded accelerators (TI’s C6x VLIW processors, for example).
+
+For these processors, the compiler is fully responsible for ordering instructions such that all of an instruction’s inputs are available when needed by the instruction.  Although ambiguous cases are less common for this class of architecture, in those cases the compiler must make conservative assumptions about required latencies.
+
+
+### LLVM SchedModel Approach
+
+LLVM uses “Read Resources” and “Write Resources” to model each instruction’s register reads and writes.  In general, resources associated with an instruction’s operands model when the reads or writes takes place in the execution pipeline.  In the simple case, the latency between two dependent instructions can be calculated by using the difference between the write resource (in the writing instruction) and the read resource (in the reading instruction):
+
+	Latency (Instr<sub>def</sub> → Instr<sub>use</sub>) = WriteResourceLatency<sub>def</sub> - ReadAdvanceLatency<sub>use</sub> + 1
+
+The ReadAdvance latency provides an adjustment to the latency between a pair of instructions. There are generally two kinds of adjustments:
+
+
+
+1. A reduction in latency due to the presence of a forwarding network, which delivers input operands to a functional unit earlier than instruction pipeline would indicate.
+2. A reduction in latency due to an instruction reading its input operand(s) later in the pipeline than normal.
+
+We refer to these as a forwarded-read and a late-operand-read, respectively. The primary difference between these reads is that a late-operand-read latency always applies to an instruction, but a forwarded-read latency is only applied if the def instruction has a particular set of write-resource ids - ie, the value is forwarded to the read instruction.
+
+When calculating latencies between two instructions, LLVM first calculates the latency for the writing instruction, and notes the instruction’s Write Resource id. It uses the Write Resource Id to lookup the read latency adjustment in the ReadAdvance table.  The latency is then calculated as, effectively:
+
+        Latency (Inst<sub>def</sub> → Inst<sub>use</sub>) = WriteResourceLatency<sub>def</sub> - ReadAdvance<sub>use</sub>[WriteResource<sub>def</sub>]
+
+A ReadAdvance resource can optionally specify a set of write resources (ValidWrites) which indicate that the adjustment is associated with a forwarding network.  An empty ValidWrites attribute indicates a late-operand-read.  So tablegen can represent both adjustment types (described above), but it _cannot_ represent a combination of the two for a single instruction.
+
+This approach conflates the behavior of an instruction (when it reads its operands) with the latency adjustment of the forwarding network that delivers values earlier or later to an instruction.  In the MDL approach, we want to decouple these behaviors.
+
+A few more observations about the tablegen approach to latency adjustment:
+
+
+
+1. Only three targets use SchedModel forwarding (AArch64, ARM, and PPC), and only three targets (ARM, PPC, and Hexagon) use Itinerary forwarding,
+2. X86 _only _implements late-reads of operands (and has a few bugs in that logic), and no forwarding,
+3. The SchedModel forwarding implementations are very sparse.  It appears that its generally been used to “cherry-pick” a few “important” cases, and in the few CPUs that model forwarding, most instructions don’t have forwarding information associated with them,
+4.  In the few CPUs that model forwarding, the vast majority of instructions don’t have ReadAdvance entries associated with input operands.  Consequently, forwarding cannot be modeled for these instructions.
+
+Or, more generally:
+
+
+
+1. Modeling forwarding networks in tablegen is tedious, so existing implementations are sparse, resulting in uneven support for forwarding across instructions.
+2. Thats ok, since latency calculations are simply a non-critical heuristic for general-purpose processors.
+3. Trying to replicate this in the MDL language - in an automatic way (tdscan) - is difficult.
+
+
+### LLVM Itinerary Approach
+
+We don’t currently support Itinerary-based modeling.
+
+Forwarding using Itineraries is only used for ARM (A9 only), PPC (for three CPUs), and Hexagon.
+
+
+### MDL Approach
+
+The MDL approach to forwarding networks is based on a several principles:
+
+
+
+*   Each instruction (or instruction class) describes its own behavior, and may have different specified behaviors on different functional units.
+*   A forwarding network impacts the behavior of a functional unit, _not_ instructions that run on it.  Therefore it is orthogonal to the specification of instruction behaviors.
+
+This is subtly different from the LLVM approach, which conflates forwarding with the behavior of the instruction, rather than a function of the datapath.
+
+In effect, we want a latency calculation that explicitly separates the instruction behavior from the forwarding network behavior:
+
+        Latency (Inst<sub>def</sub> → Inst<sub>use</sub>) = WritePhase<sub>def</sub> - ReadPhase<sub>use</sub> + 1 - 
+
+                                                                                            ForwardingCycles[FU<sub>def</sub>, FU<sub>use</sub>]
+
+In this model, there are three factors that affect the latency between a pair of instructions:
+
+
+
+*   The pipeline phase in which the write occurs
+*   The pipeline phase in which the read occurs
+*   An adjustment made for the forwarding network between the two functional units
+
+In other words: ideally, we’d like to use instruction information (SchedWrites and ReadAdvance resources) to calculate the latency between two instructions, and then independently adjust this latency based on the presence of a forwarding path between the functional units that the two instructions execute on.
+
+From a language definition perspective, we use an approach that is tied to the description of a forwarding network as a feature of the processor datapath.  Its not an attribute of instructions or even functional units, but rather a relationship between functional units.
+
+There are several types of situations that we want to handle cleanly:
+
+
+
+1. All functional units are forwarded to all other functional units, or no forwarding is implemented.  These are essentially equivalent, and shouldn’t require any explicit modeling.
+2. The forwarding network is _nearly _uniform, or extremely sparse.  In either case, we’d like a minimum description of the network, either describing connections that exist, or describing which connections are missing.
+3. Arbitrary networks - neither sparse nor uniform.
+4. (Optional) Handle the common case where units are only forwarded to themselves.
+
+To describe a forwarding network, we need to describe each path in the network based on a CPU’s functional units.  For example:
+
+
+```
+	forward FU1 → FU1, FU2, FU3;
+```
+
+
+This asserts that results produced in FU1 are forwarded to units FU1 (itself, in this case), FU2, and FU3.The functional unit names could be functional unit template names, functional unit group names, or instance names - specifying a template or group name would include all functional units of that type (or group) in the CPU or cluster. To specify a particular instance of a functional unit, use its instance name instead:
+
+
+```
+	func_unit LOAD my_load1();
+func_unit LOAD my_load2();
+	func_unit ADD my_add1();
+	func_unit ADD my_add2();
+	forward my_load1 → my_load1, my_add1;
+	forward my_load2 → my_load2, my_add2;
+	…
+```
+
+
+This defines the network, and we’d like to be able to specify the latency adjustment for each edge in the network, which can be used, optionally, by instructions. Here’s how to do that:
+
+	`forward my_load1 → my_load1(1), my_add1(2);`
+
+This is interpreted as _my\_load1_ saving 1 cycle when forwarding to itself, and saving 2 cycles when forwarding to _my\_add1_.  Note that the adjustment could be negative numbers as well, indicating that the forwarding path is missing, resulting in longer latencies.
+
+Here’s a simple example of a forwarding network.  Each “forward” statement specifies a functional unit, and a list of units it forwards to. 
+
+	`cpu my_cpu {`
+
+
+```
+		func_unit MUL my_mul();
+		func_unit ADD my_add();
+		func_unit LOAD my_load();
+		func_unit STORE my_store();
+		func_unit BRANCH my_branch();
+
+
+	forward MUL → MUL(1), ADD(2);
+    forward ADD → ADD(1), LOAD(3);
+    forward STORE → ADD(1), MUL(2);
+}
+```
+
+
+In general, we don’t expect instruction-specific forwarding behavior on a functional unit connected to the forwarding network, so for typical cases, we expect this description to generally be sufficient to describe most architectures. However, there can be exceptions.  A relatively common case would be instruction operands that are _not_ connected to the forwarding network.  We need a reasonable way to model these exceptions. 
+
+
+#### Representation of the network
+
+Each CPU has a specific number of functional units it implements.  The basic representation is simply a 2-dimensional adjacency matrix, with the edges annotated with the latency adjustment. Although we expect the network to be sparse (or empty), we need this to be fast, so we’ll use a “dense” representation, since the sizes involved are rather small.  A typical processor has fewer than  a dozen functional units, so we can use a simple 2-dimensional array of signed chars to represent the graph, and positive or negative latency adjustments.
+
+
+#### Extracting the forwarding network from SchedModels with TdScan
+
+Since SchedModel descriptions tend to conflate forwarding with instructions that read operands in later pipeline phases, we want to separate these two concerns when we generate an MDL description.  Our fundamental assumption is that forwarding to a functional unit has the same behavior for all instructions that run on that functional unit: the value is delivered to the unit some number of cycles - typically 1 - earlier than if the forwarding path wasn’t implemented. 
+
+So our first step is to extract the forwarding network using the ReadAdvance records associated with forwarding information (the ones that have ValidWrites).  Using this information, we can generate a graph of forwarding latencies between every pair of instructions, and derive the overall forwarding graph.
+
+So the approach we use in TdScan is that we use the forwarding information to find the minimal forwarding path between every pair of units, then use that information to inform the forwarding benefit for all instructions that execute on the receiving unit.   In very rare cases in current targets, the forwarding latency specified in tablegen is negative. Which we could interpret as an “early operand read” for the involved instructions. 
+
+So given our desired formula for calculating latency:
+
+        Latency (Inst<sub>def</sub> → Inst<sub>use</sub>) = WriteLatency<sub>def</sub> - ReadLatency<sub>use</sub> + 1 - 
+
+                                                                                            ForwardingCycles[FU<sub>def</sub>, FU<sub>use</sub>]
+
+We first calculate ForwardingCycles for each pair of functional units described in tablegen.  When generating latency records for ReadAdvance resources which have ValidUnits, we decrement the ReadAdvance amount by the calculated ForwardingCycles for that forwarding path. This can be a little complicated, so lets start with a simple case.
+
+If we have an instruction ABC running on “UnitA” which has a ReadAdvance with a cycle count of 1, and has forwarding paths from “UnitB”, “UnitC”, and “UnitD”, each with a forwarding latency to “UnitA” of 1.  In this case, we don’t need to generate the explicit “use” reference of 1, since it is subsumed by the forwarding adjustment. 
+
+For a more complicated case, say we have an instruction FMA running on “UnitA” which has a ReadAdvance with a cycle count of 4, and has forwarding paths from “UnitB” and “UnitC”, and we have the following forwarding graph:
+
+		UnitB forwards to UnitA with cycles= &lt;1, 4>
+
+		UnitC forwards to UnitA with cycles= &lt;2, 4>
+
+The &lt;1, 4> notation implies that some instructions asserted that the forwarding latency between B and A was 1, and some instructions (such as FMA) asserted it was 4.  Our first observation is that this doesn’t necessarily make sense - the forwarding path either exists or it doesn’t, and it shouldn’t be different for different instructions.  This is (I believe) an artifact of the LLVM approach of conflating “late operand reads” with “forwarding cycles”.  In this case, (for UnitB to UnitA), we split the 4 cycle latency between forwarding (1 cycle) and late operand reads (3 cycles), and generate a “use” record with a 3-cycle latency.  
+
+Our second observation is that since UnitC has a different forwarding latency than UnitB, we don’t have a single convenient way of describing the latency of the FMA instruction wrt both forwarding paths.
+
+The last observation is that most instructions do not have any LLVM forwarding information associated with them.  We also need to differentiate between ReadAdvances used for late-operand reads, and those used to represent forwarding.   Differentiating late-operand-reads from forwarding reads requires some explicit way of annotating which is which in the generated description.  There are three cases here (no forwarding info, late-operand-read, and forwarding read), and we need a way to communicate the three cases.
+
+In the case there is no forwarding information provided for an instruction, we need to _not_ model forwarding information for these instructions, assuming we want to match LLVM’s behavior.
+
+There’s an easy solution: instructions which don’t have input operand read resources also don’t use forwarding information.  This is perhaps inaccurate, but we can match llvm’s behavior.
+
+There are a few approaches to differentiating between late-operand-reads and forwarding reads, none of them are ideal.  If we want to match LLVM’s behavior exactly, we would also have to annotate references that _don’t _use forwarding, but _do _have explicit references.
+
+The current approach that we’ve implemented is to treat all ReadAdvances as late-operand-reads.  This produces a “best-case” latency calculation, since we assume the operand is always read late.  Note that a number of architectures (like X86, RISCV) don’t model _any_ forwarding, but only model late-read-operands. In other words, their heuristic is to assume a best-case latency. This isn’t an unreasonable approach, particularly if the target machine has a fully-connected, uniform forwarding network.
+
+Bottom line, we have forwarding support in the MDL language, but because of the incomplete and/or conflated operand reads in tablegen, its not easy to translate an LLVM description into MDL in a reasonable way.  
+
diff --git a/llvm/docs/Mdl/ItinerariesAndStages.md b/llvm/docs/Mdl/ItinerariesAndStages.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/ItinerariesAndStages.md
@@ -0,0 +1,287 @@
+
+
+## Itineraries and Stages
+
+Reid Tatge         tatge@google.com
+
+
+[TOC]
+
+
+
+#### Introduction
+
+Tablegen “Itineraries” provide a way to describe how each instruction uses resources in its pipelined execution.  Each instruction itinerary object (InstrItinData) can have a set of “InstrStages” associated with it.  Each stage specifies:
+
+
+
+*   A set of alternate resources, one of which can be reserved in that cycle
+*   The number of cycles they are reserved
+*   The number of elapsed cycles to the next stage (which could be 0)
+
+A trivial example: the following tuple describes the use of a single resource in a single stage:
+
+
+```
+	InstrStage: cycles=1, func_units=[RES1], timeinc=1
+```
+
+
+This indicates that the resource RES1 is reserved for 1 cycle, and the next stage starts 1 cycle later.  If multiple resources are used in a single cycle, you can use more than one stage to specify that conjunction:
+
+
+```
+	InstrStage: cycles=1, func_units=[RES1], timeinc=0
+	InstrStage: cycles=1, func_units=[RES2], timeinc=1
+```
+
+
+This could be written as (RES1 & RES2), since both resources are reserved for 1 cycle.
+
+You can also express alternative resources in a stage (a disjunction of resources):
+
+
+```
+	InstrStage: cycles=1, func_units=[RES1, RES2, RES3], timeinc=1
+```
+
+
+This could be written as (RES1 | RES2 | RES3). You can also specify conjunctions of disjunctions:
+
+
+```
+InstrStage: cycles=1, func_units=[RES1, RES2], timeinc=0
+	InstrStage: cycles=1, func_units=[RES3, RES4], timeinc=1
+```
+
+
+This indicates that one of [RES1, RES2] AND one of [RES3, RES4] are allocated in the same execution cycle. We write this as (RES1 | RES2) & (RES3 | RES4).
+
+In tablegen, disjunctions of conjunctions are implemented via ComboFuncUnit objects - a resource that includes 2 or more resources. Consider this example, where COM1 and COM2 are ComboFuncUnits.
+
+
+```
+InstrStage: cycles=1, func_units=[COM1, COM2], timeinc=0
+```
+
+
+If COM1 is [RES1, RES2] and COM2 is [RES3, RES4], this implements:
+
+	(RES1 | RES2) & (RES3 | RES4)
+
+In summary, a single InstrStage can represent disjunction of resources, conjunctions of resources, and disjunctions of conjunctions:
+
+	(A | B | C)          (A & B & C)          ((A & B) | (C & D))
+
+Using sets of InstrStages, you can implement conjunctions of any of those.
+
+In managing resources, the MDL language directly supports conjunctions and disjunctions of resources.
+
+	(A | B | C)            (A & B & C)
+
+The conjunction of these is accomplished by simply using more than one resource specification in a template instance. For ((A | B) & (C | D)), you’d write:
+
+	`	func_unit Adder my_adder1(A | B, C | D);`
+
+The disjunction of these is accomplished by using several templates: (A & B) | C & D)
+
+	`	func_unit Adder my_adder1(A & B);`
+
+	`	func_unit Adder my_adder2(C & D);`
+
+   
+
+Note that conjunctions of disjunctions can trivially be rewritten as their cross-product, or a disjunction of conjunctions:
+
+(A | B) & (C | D) —> (A & C) | (A & D) | (B & C) | (B & D)
+
+Issue slots can only have a single resource-specification, so we use this technique when complex conjunctions are used as issue slot specs.
+
+
+#### Issue Slot Resource Entries
+
+In the MDL, we like to separate issue resources (like issue slots) from other resources, such as functional units.  There isn’t a first-class difference between these in InstrStages in TableGen. However, targets like Hexagon use a naming convention which we can key off of to identify issue resources (ie “SLOTS”.)  
+
+Consequently, we want to identify InstrStages that reference issue slot resources and treat them as separate from functional unit resources. 
+
+
+#### MDL Resource Management
+
+In the machine description language, a functional unit “instance” is the primary vehicle for specifying which resources are used by instructions running on that unit.  It has the following general form:
+
+
+```
+	func_unit <unit-type> <unit-name> (<resource-list>) → <slot-specification>
+```
+
+
+The resource list is a comma separated list of resource specifications.  Each resource specification can be a single resource reference (with several forms), a named resource group, a conjunction of individual resource names, or a disjunction of individual resource names.  Here’s an example which passes four resource specifications to a functional unit instance:
+
+
+```
+	func_unit Adder my_adder(foo, bar, foo | bar, foo & bar);
+```
+
+
+Note that _how and when_ those resources are used is not determined by the functional unit instance, but in the subunit and/or latency templates associated with the functional unit. Also note that since all of the resources are passed to the functional unit, each functional unit can make conjunctive or disjunctive use of each of them.  Multiple instances of the same functional unit with different resource specifications represent disjunctive uses of the resource combinations.  For example:
+
+
+```
+	func_unit Adder my_adder1(a | b | c, x & y & z);
+	func_unit Adder my_adder2(a & b, x | y);
+```
+
+
+Specify that instructions that run on an Adder functional unit have two different, independent resource usage patterns.
+
+Similarly, the slot specification can be a resource, or a conjunction or disjunction of separate resources:
+
+
+```
+	func_unit Adder adder0(foo, bar, foo | bar, foo & bar) → slot0;
+	func_unit Adder adder1(foo, bar, foo | bar, foo & bar) → slot2 | slot3;
+	func_unit Adder adder2(foo, bar, foo | bar, foo & bar) → slot0 & slot1 & slot3;
+```
+
+
+In this example, there are three alternative sets of issue slots an “Adder” instruction can use.
+
+The reservation of these resources is done in latency rules, where you explicitly reference each resource.  For the above example, a reasonable latency rule might look like:
+
+
+```
+	latency adder(resource a, b, c, d) {
+		use(E1, a, b);
+		use(E2:3, c);
+		use(E5, d)
+}
+```
+
+
+This models “a” and “b” (or in this case “foo” and “bar”) as reserved in cycle E1 for 1 cycle, and “c” (“foo” or “bar”) as used in cycle E2 for 3 cycles, and “d” (“foo” and “bar”) as reserved in cycle E5.
+
+
+##### Representing InstrStages in MDL
+
+Some basic facts about InstrStages:
+
+
+
+*   A single InstrStage represents either a single resource, or a disjunction of alternative resources.
+*   A set of associated InstrStages represents a conjunction of the individual stages - in other words an instruction uses _all_ of the stages.
+*   A stage can indicate whether the next stage occurs in the same cycle or a later cycle (this is the “timeinc” attribute)
+*   Each stage specifies how long the resource(s) are reserved for - independent of when the next stage occurs (the “cycles” attribute)
+
+
+##### Handling Issue Stages
+
+Issue stage disjunctions of single resources are directly supported:
+
+
+```
+InstrStage: cycles=1, func_units=[SLOT0, SLOT1], timeinc=0
+```
+
+
+	`func_unit Adder add1() → SLOT0 | SLOT2;`
+
+Issue stage conjunctions of single resources are directly supported:
+
+
+```
+InstrStage: cycles=1, func_units=[SLOT0], timeinc=0
+InstrStage: cycles=1, func_units=[SLOT1], timeinc=0
+func_unit <type> <name>() → SLOT0 & SLOT1;
+```
+
+
+Issue stage conjunctions with disjunctions are converted to disjunctions of conjunctions and modeled as separate functional unit instances:
+
+
+```
+InstrStage: cycles=1, func_units=[SLOT0 | SLOT1], timeinc=0
+InstrStage: cycles=1, func_units=[SLOT2], timeinc=0
+```
+
+
+	`func_unit Adder add1() → SLOT0 & SLOT2;`
+
+
+```
+func_unit Adder add2() → SLOT1 & SLOT2;
+```
+
+
+
+##### Handling Resource Reservation Specifications
+
+As with issue slots, resource disjunctions and conjunctions can be represented directly:
+
+
+```
+InstrStage: cycles=1, func_units=[CVI_ST, CVI_XLANE], timeinc=0
+```
+
+
+	`func_unit Adder add1(CVI_ST | CVI_XLANE);`
+
+Conjunctions with the same phase and cycles can also be combined into a single reference spec:
+
+
+```
+InstrStage: cycles=1, func_units=[CVI_ST], timeinc=0
+InstrStage: cycles=1, func_units=[CVI_XLANE], timeinc=0
+```
+
+
+	`func_unit Adder add1(CVI_ST & CVI_XLANE);`
+
+Resource conjunctions of disjunctions are simply split into separate resource specifications.  Unlike with issue slots, there’s no need to generate separate functional units:
+
+
+```
+InstrStage: cycles=1, func_units=[CVI_ST | CVI_LD], timeinc=0
+InstrStage: cycles=1, func_units=[CVI_XLANE], timeinc=-1
+```
+
+
+	`func_unit Adder add1(CVI_ST | CVI_LD, CVI_XLANE);`
+
+Finally, resources that are used in different cycles are passed as separate resource specifications:
+
+
+```
+InstrStage: cycles=1, func_units=[CVI_ST], timeinc=1
+InstrStage: cycles=1, func_units=[CVI_XLANE], timeinc=1
+InstrStage: cycles=1, func_units=[CVI_XLANE | CVI_ST], timeinc=-1
+```
+
+
+	`func_unit Adder add1(CVI_ST, CVI_XLANE, CVI_ST | VI_XLANE);`
+
+Note that while disjunctions always share cycles and timeinc parameters, conjunctions can have distinct timing for each member.  
+
+Resource disjunctions of conjunctions can’t be represented directly in itineraries.  However, they can be modeled like any disjunction in MDL with separately specialized functional unit instances.
+
+
+#### MDL Language Implementation Note
+
+As mentioned previously, the MDL language syntactically only supports disjunctions and conjunctions of resources.  We do not currently allow resource groups to be used in conjunction or disjunction expressions, _even though they can represent conjunctions or disjunctions, and used in place of one_. 
+
+Nor do we support more complex expressions of resources and/or resource groups. We could support this, and transparently perform the decomposition of these expressions (described earlier in the doc), but this adds complexity that we felt was, in general, unnecessary.  One of the important goals of the language is to mirror a low-level interface to the actual hardware, and to not do “surprising” transformations to the description.
+
+
+#### Implementation
+
+The first step is to identify issue stages. These are the initial stages that only reference “SLOT” resources.  All the rest of the stages will be associated with functional unit instance resource specs.
+
+Next we split the resource specs into expressions that will be treated as separate arguments to the functional unit template.  There are two rules:
+
+
+
+*   All the stages associated with a single argument must be in the same pipeline phase (timeinc=0) with the same number of cycles.
+*   Any stage that is a disjunction is a separate argument
+
+Next, for the issue stages and each argument, create an object that represents the phase, cycle, and resource expression.  Expand ComboFuncUnits into & expressions, and simplify if necessary.
+
+
diff --git a/llvm/docs/Mdl/MDLCompilerDesignNotes.md b/llvm/docs/Mdl/MDLCompilerDesignNotes.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/MDLCompilerDesignNotes.md
@@ -0,0 +1,334 @@
+
+
+## Machine Description Compiler Design Notes
+
+Reid Tatge         tatge@google.com
+
+
+[TOC]
+
+
+
+### Introduction and Overview
+
+The purpose of this document is to describe the overall design and flow of the machine description compiler. (See llvm/docs/Mdl/MachineDescriptionNotes.md for a description of the language.)
+
+The compiler compiles the input machine description into C++ code that describes the behavior of every target instruction in enough detail to generate low-level instruction scheduling, register allocation, and bundle-packing infrastructure. It accomplishes this in phases:
+
+
+
+*   Parsing the input
+*   Error checking, consistency checking
+*   Template instantiation
+*   Generation of an instruction database
+*   Generation of the C++ code
+
+Each of these will be discussed separately.
+
+
+### Parsing the Input File
+
+The compiler uses an Antlr4-generated parser. We use the antlr visitor model to walk through the generated parse tree and generate an internal representation of the entire description in an object “MdlSpec”.
+
+An MdlSpec represents all the objects defined at the top level of the input machine description:
+
+
+
+*   A processor family name.
+*   The set of processor pipeline specifications.
+*   The set of globally defined resources.
+*   The set of defined registers.
+*   The set of defined register classes.
+*   The set of CPU definitions.
+*   The set of function unit template definitions.
+*   The set of subunit template definitions.
+*   The set of latency template definitions.
+*   The set of target instruction definitions.
+*   The set of target operand definitions.
+
+The parsing phase does essentially no semantic checking.  This implies that definitions of MDL objects can appear in any order, and namespaces between functional units, subunits, latencies, clusters, etc can overlap. 
+
+
+#### CPU Definitions
+
+A CPU definition (CpuInstance) at this point contains:
+
+
+
+*   A set of “issue” resource definitions.
+*   A set of general resource definitions.
+*   A set of cluster definitions.
+
+Note that while the syntax allows it, we don’t describe functional unit instantiations at this level.  The parser collects functional unit instances in a CPU definition, and creates a cluster to contain them.
+
+A cluster instantiation (ClusterInstantiation) at this point contains:
+
+
+
+*   A set of “issue” resource definitions.
+*   A set of general resource definitions.
+*   A set of function unit instantiations. 
+
+ 
+
+A functional unit instance (FuncUnitInstance) contains:
+
+
+
+*   The type and name of the instance.
+*   A set of arguments to this instantiation of the functional unit template.
+*   A set of issue slot pinning resource references
+
+
+#### Template Definitions
+
+There are three types of templates that are defined in a machine description.  
+
+_Functional unit template_ definitions contain the following information (In FuncUnitTemplate objects():
+
+
+
+*   A base functional unit template reference.  A base functional unit is instantantiated any time a derived functional unit is instantiated.  
+*   A list of template parameters, used to parameterize each instantiation of the template.  Parameters can refer to either resources or register classes. 
+*   A list of “port” resources.  Ports are resources which we can associate register constraints and resources that are referenced when the port is referenced.
+*   A list of functional unit resource definitions.
+*   A list of “connect” statements.  Connects are used to associate register classes and resources with a port definition.
+*   A list of subunit instantiations specific to this functional unit. Each subunit instance has a set of parameters which are a combination of local and parameter definitions (ports or resources).
+
+Functional unit templates are instantiated in CPU clusters, and are parameterized by the parameters passed to the instantiation.  Each instance gets a unique set of declared resources and ports.
+
+_Subunit template_ definitions contain the following information (in SubUnitTemplate objects): 
+
+
+
+*   A base subunit template reference. The derived subunit contains all the rules from the base template.
+*   A list of template parameters, used to parameterize each instantiation of the template.  Subunit template parameters are reference expressions that refer to template parameters (ports or resources).
+*   A list of latency instantiations. Each latency instance has a set of parameters which are references to the subunit template parameters.
+
+_Latency template_ definitions contain the following information (in LatencyTemplate objects):
+
+
+
+*   A (set of) base latency template(s).   When a latency template is instantiated in a subunit instance, the contents of all the base latency templates are also instantiated.  Note that these can be recursive or diamond shaped inheritance, since we only instantiate each base class once.
+*   A list of template parameters, used to parameterize each instantiation of the template.  Latenchy template parameters are reference expressions that refer to template parameters (ports or resources).
+
+
+### Error and Consistency Checking
+
+After we import the Antlr parse tree into our internal representation, we need to perform semantic error checking on the specification.  The primary purpose of this stage is to ensure that there are no glaring semantic errors in the input description, so that the template instantiation and database generate stages have relatively little correctness checking to do.
+
+
+#### Building dictionaries
+
+The first task is to build lookup tables for all the different objects in the description. While we do this, we do an initial check for duplicate entries in any single table.   We build dictionaries for: \
+
+
+
+
+*   Functional unit templates
+*   Subunit templates
+*   Latency templates
+*   Operand definitions
+*   Instruction definitions
+*   Register class definitions
+
+The dictionaries provide a way to rapidly look up globally defined objects.
+
+
+#### Checking for duplicate definitions
+
+Once we’ve built dictionaries for the major globally defined objects, we check for duplicate definitions across all objects. This one-time check ensures that we don’t have namespace conflicts later in the compilation. In general, we want to make sure that lists of similar objects don’t contain duplicates.  
+
+
+
+*   For pipeline definitions, we
+    *   Check that each pipe class definition has a different name.
+    *   Check that each phase name in each pipe class is unique across all pipeline definitions.
+*   For globally defined registers, resources, and register classes
+    *   We make sure all of these names are different from each other.
+*   For any globally defined structured resource,
+    *   Check that its member names are not duplicates.
+*   For register classes and operands.
+    *   Check that the operand names and register class names are not duplicates.
+*   For each template (functional unit, subunit, and latency)
+    *   Check the parameter list for duplicate names. 
+*   For functional unit templates,
+    *   Check that locally defined resources and ports have unique names.
+    *   Check that locally defined structured resources have unique member names.
+*   Check that all CPU names are unique.
+    *   All locally defined resources are unique, and different from globally defined resources, registers, and register classes.
+    *   Check that cluster names are all different.
+    *   Check that locally defined structured resources have unique names.
+    *   For each cluster, 
+        *   Check that locally defined resources and ports have unique names, and don’t hide CPU and globally defined resources, registers, or register classes.
+        *   Check that each functional unit instance has a different name.
+        *   Check that locally defined structured resources have unique names.
+*   Check that each instruction and operand definition has a unique name
+*   For each instruction and operand definition, check that each operand (or suboperand) has a different name.
+
+
+#### Instruction and operand checks
+
+
+
+*   We check that each instruction refers to a defined subunit template definition.
+*   We check that instruction operands refer to defined operand template definitions.
+*   We check that operands’ suboperands refer to defined operand template definitions.
+*   We check that derived operands have valid base operand definitions.
+*   We check that operands are not recursively defined (either through base operands or suboperands).
+*   We check operand base classes to ensure that there are no diamond-shaped derivations.
+
+
+#### Template and instance checking
+
+
+
+*   We check that all template bases (for functional units, subunits, and latencies) refer to valid base templates, and have compatible parameter types.
+*   Check that all template instantiations (in CPUs, clusters, functional units, and subunits) refer to appropriately defined templates, and have the right number of template parameters.
+*   In this phase, we link each functional unit and subunit instance to their associated template definitions.  We also link templates (of all types) to their declared bases.
+
+
+#### Other semantic checks and miscellany 
+
+
+
+*   We need to ensure that each pooled resource definition that supports sharing is defined with an explicit pipeline phase.  
+*   We check that all pipe phase references (in resource definitions and latency rules) all refer to valid pipeline phase definitions.
+*   We check each reference rule in each latency template:
+    *   that references to operand’s suboperands refer to valid members
+    *   that the phase expression is legal
+
+The last thing we do at this phase is to promote all globally defined resources into each CPU definition.  This provides each CPU definition with a completely unique set of resources.
+
+
+### Template Instantiation
+
+The goal of this phase is to create, for each subunit type, a table of specialized subunit template instantiations, each of which contains the specialized instantiations of its latency instances. Each of these subunit instantiations is in turn a partially specialized template definition used in generation of sets of behaviors for each client instruction definition.
+
+This phase of compilation iterates over all CPUs and all clusters, and instantiates each function unit instance found.  It then instantiates each subunit found in each expanded functional unit instance.  The critical output of this process is the subunit instantiation table, which contains a vector of specialized subunit instantiations for each subunit name.  This table forms the basis for the next phase of compilation.
+
+The pseudocode for this pass is:
+
+
+```
+    for each CPU definition
+       for each cluster in the CPU
+          for each functional unit instance in the cluster
+             instantiate the functional unit;
+```
+
+
+The pseudocode for instantiating a functional unit:
+
+
+```
+    Bind incoming parameters and connect statements;
+    for each subunit instance
+       instantiate the subunit and add to the subunit table;
+    instantiate all base functional units;
+```
+
+
+The pseudocode for instantiating a subunit instance:
+
+
+```
+    Bind incoming parameters;
+    For each latency instance
+       instantiate the latency instance;
+```
+
+
+The pseudocode for instantiating a latency instance:
+
+
+```
+    Bind incoming parameters;
+    For each reference
+       Specialize the reference and append to the subunit instance;
+    Instantiate all base latency templates;
+```
+
+
+This is best illustrated with a simple machine description example: \
+
+
+
+```
+    phases xxx { E1, E2, E3 };            // define 3 pipeline phases
+    register r[0..31];                    // define 31 registers
+    register_class LOW { r[0..15]; }      // define a register class
+    register_class HIGH { r[16..31]; }    // define a register class
+    register_class GPR { r[0..31]; }      // define a register class
+
+    cpu cpu1 {                            // define a cpu 
+       resource res1, res2, res3, res4;
+       cluster a {
+           func_unit alu alu1(res1, res2, LOW);   // instance of FU alu
+           func_unit alu alu2(res3, res4, HIGH);  // instance of FU alu
+       }
+    }
+    func_unit alu(resource a, b; class hilow) {
+        port regs;
+        connect regs to hilow via b;
+        subunit alu(a, regs);
+    }
+    subunit alu(resource x; port y) {
+        latency alu(x, y);
+    }
+    latency alu(resource x, port y) {
+        use(E1, $src, y, x);
+        def(E2, $dst, y, x);
+    }
+    instruction add(GPR dst, GPR src) { subunit(alu); }
+```
+
+
+Template instantiation works similarly to C++ template instantiation - instances are effectively replaced by specialized instantiations of the referenced template.  Functional unit instantiation replaces the two functional unit instances (ie, “alu1” and “alu2” in cpu1, cluster a) with the two specialized versions of the underlying template:
+
+
+```
+    cpu cpu1 { 
+       resource res1, res2, res3, res4;
+       cluster a {
+           func_unit alu alu1(res1, res2, LOW) {
+              port regs<LOW>(res2);
+              subunit alu(res1, regs);
+           }
+           func_unit alu alu2(res3, res4, HIGH) {
+              port regs<HIGH>(res4);
+              subunit alu(res3, regs);
+           }
+    }
+```
+
+
+ 
+
+We then instantiate the subunits to create 2 instances for subunit “alu”, and instantiate their latency instances, which are specialized for the cpu, cluster, and functional unit they were instantiated in. This produces:
+
+
+```
+    subunit cpu1.a.alu1.alu(resource res1; port regs[LOW] via res3) {
+        use(E1, $src, regs[LOW] via res3, res1);
+        def(E2, $dst, regs[LOW] via res3, res1);
+    }
+    subunit cpu1.a.alu2.alu(resource res2; port regs[HIGH] via res4) {
+        use(E1, $src, regs[HIGH] via res4, res2);
+        def(E2, $dst, regs[HIGH] via res4, res2);
+    }
+```
+
+
+In this example, we have two subunit instantiations which can be used to generate behaviors for instructions that use the “alu” subunit.  We now know that the “add” instruction has two distinct behaviors, in terms of functional units it can run on, resources used, and function-unit-specific operand constraints.
+
+
+### Generation of the Instruction Database
+
+<coming soon!>
+
+
+### Generation of C++ Code
+
+<coming soon!>
+
diff --git a/llvm/docs/Mdl/MachineDescriptionNotes.md b/llvm/docs/Mdl/MachineDescriptionNotes.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/MachineDescriptionNotes.md
@@ -0,0 +1,2543 @@
+
+
+## MPACT Microarchitecture Description Language
+
+Reid Tatge          [tatge@google.com](mailto:tatge@google.com)
+
+
+[TOC]
+
+
+
+### **Goals for a Machine Description Language**
+
+Modern processors are complex: multiple execution pipelines, dynamically dispatched, out-of-order execution, register renaming, forwarding networks, and (often) undocumented micro-operations. Instruction behaviors, including micro-operations, often can’t be _statically_ modeled in an accurate way, but only _statistically_ modeled. In these cases, the compiler’s model of a microarchitecture (Schedules and Itineraries in LLVM) is effectively closer to a heuristic than a formal model. And this works quite well for general purpose microprocessors.
+
+However, modern accelerators have different and/or additional dimensions of complexity: VLIW instruction issue, unprotected pipelines, tensor/vector ALUs, software-managed memory hierarchies. And it's more critical that compilers can precisely model the details of that complexity. Currently, LLVM’s Schedules and Itineraries aren’t adequate for directly modeling many accelerator architectural features.
+
+So we have several goals:
+
+
+
+1. We want a first-class, purpose-built, intuitive language that captures all the scheduling and latency details of the architecture - much like Schedules and Itineraries - that works well for any current targets, but also for a large class of accelerator architectures..
+2. The complexity of the specification should scale with the complexity of the hardware. 
+3. The description should be succinct, avoiding duplicated information, while reflecting the way things are defined in a hardware architecture specification.
+4. We want to generate artifacts that can be used in a machine-independent way for back-end optimization, register allocation, instruction scheduling, etc - anything that depends on the behavior and constraints of instructions.
+5. We want to support a much larger class of architectures in one uniform manner.
+
+For this document (and language), the term “instructions” refers to the documented instruction set of the machine, as represented by LLVM instructions descriptions, rather than undocumented micro-operations used by many modern microprocessors. 
+
+ 
+
+The process of compiling a processor’s machine description creates several primary artifacts:
+
+
+
+*   For each target instruction (described in td files), we create an object that describes the detailed behaviors of the instruction in any legal context (for example, on any functional unit, on any processor)
+*   A set of methods with machine independent APIs that leverage the information associated with instructions to inform and guide back-end optimization passes.  
+
+The details of the artifacts are described later in this document.
+
+_Note: A full language grammar description is provided in an appendix.  Snippets of grammar throughout the document only provide the pertinent section of the grammar, see the Appendix A for the full grammar._
+
+The proposed language can be thought of as an _optional extension to the LLVM machine description_. For most upstream architectures, the new language offers minimal benefit other than a much more succinct way to specify the architecture vs Schedules and Itineraries.  But for accelerator-class architectures, it provides a level of detail and capability not available in the existing tablegen approaches.
+
+
+#### **Background**
+
+Processor families evolve over time. They accrete new instructions, and pipelines change - often in subtle ways - as they accumulate more functional units and registers; encoding rules change; issue rules change. Understanding, encoding, and using all of this information - over time, for many subtargets - can be daunting.  When the description language isn’t sufficient to model the architecture, the back-end modeling evolves towards heuristics, and leads to performance issues or bugs in the compiler. And it certainly ends with large amounts of target specific code to handle “special cases”. 
+
+LLVM uses the [TableGen](https://llvm.org/docs/TableGen/index.html) language to describe a processor, and this is quite sufficient for handling most general purpose architectures - there are 20+ processor families currently upstreamed in LLVM! In fact, it is very good at modeling instruction definitions, register classes, and calling conventions.  However, there are “features” of modern accelerator micro-architectures which are difficult or impossible to model in tablegen.
+
+We would like to easily handle:
+
+
+
+*   Complex pipeline behaviors
+    *   An instruction may have different latencies, resource usage, and/or register constraints on different functional units or different operand values.
+    *   An instruction may read source registers more than once (in different pipeline phases).
+    *   Pipeline structure, depth, hazards, scoreboarding, and protection may differ between family members.
+*   Functional units
+    *   Managing functional unit behavior differences across subtargets of a family.
+    *   Impose different register constraints on instructions (local register files, for example).
+    *   Share execution resources with other functional units (such as register ports)
+    *   Functional unit clusters with separate execution pipelines.
+*   VLIW Architecture 
+    *   issue rules can get extremely complex, and can be dependent on encoding, operand features, and pipeline behavior of candidate instructions.** \
+**
+
+More generally, we’d like specific language to:
+
+
+
+*   Support all members of a processor family
+*   Describe CPU features, parameterized by subtarget
+    *   Functional units
+    *   Issue slots
+    *   Pipeline structure and behaviors
+
+Since our emphasis is on easily supporting accelerators and VLIW processors, in addition to supporting all existing targets, much of this is overkill for most upstreamed CPUs.  CPU’s typically have much simpler descriptions, and don’t require much of the capability of our machine description language.  Incidentally, MDL descriptions of these targets (generated automatically from the tablegen Schedules and Itineraries) are typically much more concise than the original tablegen descriptions.
+
+
+#### **Approach - “Subunits” and Instruction Behaviors**
+
+We developed a DSL that allows us to describe an arbitrary processor microarchitecture in terms that reflect what is typically documented in the hardware specification. The MDL compiler creates a database that provides microarchitecture behavior information that can _automatically _inform critical back-end compiler passes, such as instruction scheduling and register allocation, in a machine-independent way. 
+
+It’s important to note the difference between an instruction definition, as described in LLVM, and an instruction instance.  Generally, instructions defined in LLVM share the same behaviors across all instances of that instruction in a single subtarget. Exceptions to this require non-trivial code in the back-end to model variant behavior.  In VLIW and accelerator architectures, each generated instance of an instruction can have different behaviors, depending on how it's issued, its operand values, the functional unit it runs on, and the subtarget. So we provide a way to model those differences in reasonable ways.
+
+The MDL introduces the concept of a “subunit” to abstractly represent a class of instructions with the same behaviors. Subunit instances concretely connect instructions to descriptions of their behaviors, _and_ to the functional units that they can be issued on. A subunit is vaguely analogous to collections of SchedRead and SchedWrite resources. 
+
+Naively, we could create unique subunits for each behavior for each instruction, the set of which would enumerate the cross-product of the instruction’s behaviors on every subtarget, functional unit, and issue slot. But subunits can be specialized by subtarget, functional unit, and each instruction definition, so a single subunit definition can properly describe behaviors for sets of instructions in many different contexts.
+
+A key aspect of this language design is that we can explicitly represent the potentially polymorphic behavior of each generated instance of any instruction, on any functional unit, on any subtarget.  The representation also comprehends that this information can vary between each of an instruction’s instances.
+
+  
+
+We define a subunit as an object that defines the _behavior sets_ of an instruction instance in all legal contexts (functional units, issue slots), for each subtarget.  In particular, we want to know:
+
+
+
+
+*   What resources are shared or reserved, in what pipeline phases.
+    *   Encoding resources
+    *   Issue slot(s) used
+    *   Functional unit resources
+    *   Shared/private busses, register ports, resources, or pooled resources
+*   What registers are read and written, in which pipeline phases (ie, the instruction’s “latencies”)
+*   What additional register constraints does a functional unit instance impose on an instruction’s registers.
+
+The critical artifact generated by the MDL compiler is a set of instruction behaviors for each instruction definition.  For each subtarget, for each instruction, we generate a list of every possible behavior of that instruction on that CPU.  While this sounds daunting, in practice it's rare to have more than a few behaviors for an instruction, and most instruction definitions share their behaviors with many other instructions, across subtargets.
+
+
+### **Overview of a Processor Family Description**
+
+This document generally describes the language in a bottom up order - details first.  But let's start with a brief tops-down overview of what a processor family description looks like, without going into details about each part.
+
+A minimal processor family description has the following components:
+
+
+
+*   A set of CPU definitions - one for each subtarget.
+*   A set of functional unit template definitions,
+*   A set of subunit template definitions,
+*   A set of latency template definitions.
+
+A CPU definition specifies a set of functional unit instances that define the processor, as well as pipeline descriptions, issue slot resources, and binding of functional units to issue slots.  Each functional unit instance can be parameterized and specialized.
+
+A functional unit template specifies a set of subunits instances implemented by an instance of the functional unit.  It can be parameterized and specialized for each instance in different CPUs.
+
+A subunit template abstractly defines a set of related operations that have similar behaviors. They specify these behaviors with a set of “latency” instances.  They can also be parameterized and specialized for each instance in different functional unit templates.  Subunits tie instruction definitions both to functional units on which they can execute, and instruction behaviors described in latency templates.
+
+A latency template defines the pipeline behavior of a set of instructions.  It can be parameterized and specialized for each instance in a subunit instance.  It is also specialized for each instruction that is tied to it (through a subunit).  A latency rule, at a minimum, specifies when each operand is read and written in the execution pipeline.
+
+Here’s a very simple example of a trivial CPU, with three functional units, two issue slots, and a four-deep pipeline:
+
+
+```
+    cpu myCpu {
+    	phases cpu { E1, E2, E3, E4 };
+    issue slot1, slot2;
+    	func_unit FU_ALU my_alu1();    	// an instance of FU_ALU
+    	func_unit FU_ALU my_alu2();    	// an instance of FU_ALU
+    	func_unit FU_LOAD my_load();   	// an instance of FU_LOAD
+    }
+
+    func_unit FU_ALU() {            		// template definition for FU_ALU
+    	subunit ALU();              	// an instance of subunit ALU
+    }
+    func_unit FU_LOAD() {               	// template definition for FU_LOAD
+    	subunit LOAD();                	// an instance of subunit LOAD
+    }
+
+    subunit ALU() {                      	// template definition for ALU
+    	latency LALU();                	// an instance of latency LALU
+    }
+    subunit LOAD() {                     	// template definition for LOAD
+    	latency LLOAD();               	// an instance of latency LLOAD
+    }
+
+    latency LALU() {                     	// template definition for LALU
+    	def(E2, $dst);  use(E1, $src1);  use(E1, $src2); 
+    }
+    latency LLOAD() {                    	// template definition for LLOAD
+    	def(E4, $dst);  use(E1, $addr);
+    }
+```
+
+
+A more complete description of each part of this description is provided in the section “Defining a Processor Family”.
+
+**Defining an ISA**
+
+We need to map a microarchitecture model back to LLVM instruction, operand, and register definitions.  So, the MDL contains constructs for defining instructions, operands, registers, and register classes.  
+
+When writing a target machine description, its not necessary to write descriptions for instructions, operands, and registers - we scrape all of this information about the CPU ISA from the tablegen output as part of the build process, and produce an MDL file which contains these definitions. The machine description compiler uses these definitions to tie architectural information back to LLVM instructions, operands, and register classes.
+
+We will describe these language features here, primarily for completeness.
+
+
+#### **Defining Instructions**
+
+Instruction definitions are scraped from tablegen files, and provide the following information to the MDL compiler for each instruction:
+
+
+
+*   The instruction’s name (as defined in the td files)
+*   Its operands, with the operand type and name provided in the order they are declared, and indicating whether each is an input or output of the instruction.
+*   A set of “legal” subunit definitions (a “subunit” is described later in this document)
+*   An optional list of instructions derived from this one.
+
+As in tablegen, an operand type must be either an operand name defined in the td description, a register class name defined in the td description, or simply a defined register name. If the operand type is a register name, the operand name is optional (and ignored) (these register operands are used to represent implied operands in LLVM instructions). 
+
+Grammar:
+
+
+```
+    instruction_def  : 'instruction' IDENT
+                          '(' (operand_decl (',' operand_decl)*)? ')'
+                          '{'
+                              ('subunit' '(' name_list ')' ';' )?
+                              ('derived' '(' name_list ')' ';' )?
+                          '}' ';'? ;
+    operand_decl     : ((IDENT (IDENT)?) | '...') ('(I)' | '(O)')? ;
+```
+
+
+An example:
+
+
+```
+    instruction ADDSWri(GPR32 Rd(O), GPR32sp Rn(I), addsub_shifted_imm32 imm(I), NZCV(O)) {
+      subunit(sub24,sub26);
+    }
+```
+
+
+This describes an ARM add instruction that has two defined input operands (Rn, imm), one defined output operand (Rd), and one implicit output operand (NZCV), which is associated with two subunits (sub24, sub26).
+
+
+#### **Defining Operands**
+
+Operand definitions are scraped from tablegen files (like instructions), and provide the following information to the MDL compiler for each operand:
+
+
+
+*   The operand’s name,
+*   Its sub-operands, with the operand type and operand name provided in the order they are declared.  Note that operand names are optional, and if not present we would refer to these by their sub-operand id (0, 1, etc),
+*   The operand’s value type.
+
+As in LLVM, an operand definition’s sub-operand types may in turn refer to other operand definitions. (Note that operand’s sub-operands are declared with the same syntax as instruction operands.)
+
+Grammar:
+
+
+```
+    operand_def      : 'operand' IDENT
+                          '(' (operand_decl (',' operand_decl)*)? ')'
+                          '{' operand_type '}' ';'? ;
+```
+
+
+Some examples:
+
+
+```
+    operand GPR32z(GPR32 reg) { type(i32); } 
+    operand addsub_shifted_imm32(i32imm, i32imm) { type(i32); }
+```
+
+
+
+#### **Defining Registers and Register Classes**
+
+Registers and register classes are scraped from tablegen output.  We provide a general method in the language to define registers and classes of registers which can reflect the registers defined in tablegen. 
+
+Grammar:
+
+
+```
+    register_def     : 'register' register_decl (',' register_decl)* ';' ;
+    register_decl    : IDENT ('[' range ']')? ;
+    register_class   : 'register_class' IDENT
+                            '{' register_decl (',' register_decl)* '}' ';'? 
+                     | 'register_class' IDENT '{' '}' ';'? ;
+```
+
+
+Examples:
+
+
+```
+    register a0, a1, a2, a3;                 // 4 registers
+    register a[4..7];                        // definition of a4, a5, a6, and a7
+
+    register_class low3 { a0, a1, a2 };      // a class of 3 registers
+    register_class high5 { a[3..7] };        // a class of a3, a4, a5, a6, and a7
+```
+
+
+The order of register definitions is generally insignificant in the current MDL - we use the register names defined in LLVM, and there’s no cases in the MDL where we depend on order.  Register “ranges”, such as “a[0..20]” are simply expanded into the discrete names of the entire range of registers.
+
+
+#### **Defining Derived Operands**
+
+LLVM doesn’t necessarily provide all the information we want to capture about an instruction, so the MDL allows for defining “derived” operands with which we can associate named values.  A derived operand is essentially an alias to one or more LLVM-defined operands (or derived operands), and provides a mechanism to add arbitrary attributes to operand definitions. Derived operands also allow us to treat a set of operand types as identical in latency reference rules (so you don’t have to specify a long set of operand types for some references.)
+
+Grammar:
+
+
+```
+    derived_operand_def     : 'operand' IDENT (':' IDENT)+  ('(' ')')?
+                                  '{' (operand_type | operand_attribute)* '}' ';'? ;
+    operand_attribute_stmt  : 'attribute' IDENT '=' (snumber | tuple)
+                                ('if' ('lit' | 'address' | 'label')
+```
+
+
+
+    `                              ('[' pred_value (',' pred_value)* ']' )? )? ';' `;
+
+
+```
+    pred_value              : snumber
+                            | snumber '..' snumber
+                            | '{' number '}' ;
+	tuple                   : '[' snumber (',' snumber)* ']' ;
+```
+
+
+
+##### **Derivation**
+
+Each derived operand is declared with one or more “base” operands, for which it is an alias. Circular or ambiguous derivations are explicitly disallowed - there must be only one derivation path for a derived operand to any of its base concrete operands.
+
+Derived operands are used in place of their base operands in operand latency rules in latency templates (described later). This allows a rule to match a set of operands, rather than a single operand, and also can provide access to instruction attributes to the latency rule.
+
+
+##### **Derived operand attributes**
+
+Derived operand attributes associate name/value-tuple pairs with the operand type. Tuples are appropriate when an attribute is used as a set of masks for resource sharing, described later.  
+
+Some examples:
+
+
+```
+    attribute my_attr_a = 1;
+    attribute my_attr_b = 123;
+    attribute my_tuple  = [1, 2, 3];
+```
+
+
+Attributes can have predicates that check if the operand contains a data address, a code address, or any constant.  Additionally, attributes can have multiple definitions with different predicates, with the first “true” predicate determining the final value of the attribute for that operand instance:
+
+
+```
+    attribute my_attr = 5 if address;    // if operand is a relocatable address
+    attribute my_attr = 2 if label;      // if operand is a code address
+    attribute my_attr = 3 if lit;        // if operand is any literal constant
+```
+
+
+Predicates for literal constants can also take an optional list of “predicate values”, where each predicate value is either an integer, a range of integers, or a “mask”. Mask predicate values are explicitly checking for non-zero bits:
+
+
+```
+    attribute my_attr = 5 if lit [1, 2, 4, 8];    // looking for specific values
+    attribute my_attr = 12 if lit [100..200];     // looking for a range of values
+    attribute my_attr = 1 if lit [{0x0000FFFF}];  // looking for a 16 bit number
+    attribute my_attr = 2 if lit [{0x00FFFF00}];  // also a 16-bit number!
+    attribute my_attr = 3 if lit [1, 4, 10..14, 0x3F800000, {0xFF00FF00}]; 
+```
+
+
+Note that we explicitly don’t directly support floating point numbers: this should be done instead with specific bit patterns or masks.  This avoids problems with floating point precision and format differences across systems:
+
+
+```
+    attribute my_attr = 1 if lit [0xBF800000, 0x402DF854];   // -1.0, or pi
+    attribute my_attr = 2 if lit [{0x7FFF000}];              // +BF16 number
+```
+
+
+If all of an attribute’s predicates are “false” for an instance of an operand, the compiler recursively checks the attribute’s value in each of the operand’s bases until if finds a true predicate (or an unpredicated attribute): 
+
+
+```
+    operand i32imm() { type(i32); }   // scraped from llvm td file.
+
+    operand huge_imm : i32imm() {
+       attribute size = 3;
+    }
+    operand medium_imm : big_imm() {
+       attribute size = 2 if lit [-32768..32676];
+    }
+    operand small_imm : medium_imm() {
+       attribute size = 1 if lit [0-16];
+    }
+```
+
+
+
+##### **Derived operand attribute usage**
+
+There is currently only a single context in which instruction attributes are used directly in the machine description, as part of resource references in latency rules (see “latency\_resource\_ref”). In this context, you can specify an attribute name which provides the number of resources needed for a resource allocation, and the mask used to determine shared operand bits associated with the resource.  An example:
+
+
+```
+    … my_resource:my_size_attribute:my_mask_attribute …
+```
+
+
+This resource reference uses the attributes from the operand associated with this reference to determine how many resources to allocate, and what bits in the operand to share.
+
+
+### **Overview of Resources**
+
+Resources are used to abstractly describe hardware constructs that are used by an instruction in its execution.  They can represent:
+
+
+
+*   functional units, 
+*   issue slots, 
+*   register ports, 
+*   shared encoding bits, 
+*   or can name any hardware resource an instruction uses when it executes that could impact the instruction’s behavior (such as pipeline hazards).
+
+Its important to note that different instances of an instruction can use completely different resources depending on which functional unit, and which subtarget, it's issued on. The MDL has an explicit way to model this.
+
+The machine description provides a mechanism for defining and associating resources with the pipeline behaviors of instructions through the specialization of functional unit templates, subunit templates, and latency templates. It also allows automatic allocation of shared resources for an instruction instance from resource pools. The MDL compiler generates behavior descriptions which explicitly reference each resource (or resource pool) the instruction uses, and in what pipeline phases.  This provides a direct methodology for managing instruction issue and pipeline behaviors such as hazards.
+
+
+#### Defining Resources
+
+There are a few ways that resources are defined:
+
+
+
+*   **Functional Units:** A resource is implicitly defined for every functional unit instance in a CPU definition. An instruction that executes on a particular instance will reserve that resource implicitly. 
+*   **Issue Slots: **Each CPU, or cluster of functional units in a CPU, can explicitly define a set of issue slots.  For a VLIW, these resources directly correspond to instruction encoding slots in the machine instruction word, and can be used to control which instruction slots can issue to which functional units.  For dynamically scheduled CPUs, these correspond to the width of the dynamic instruction issue. 
+*   **Named Resources** can be explicitly defined in several contexts, described below.
+*   **Ports:** Ports are functional unit resources that model a register class constraint and a set of associated resources. These are intended to model register file ports that are shared between functional units.
+
+Explicitly defined resources have scope - they can be defined globally (and apply to all CPU variants), within a CPU, within a cluster, or within a functional unit template.  Intuitively, shared resources are typically defined at higher levels in the machine description hierarchy.  Resources and ports defined within a functional unit template are replicated for each instance of that functional unit.  “Issue” resources are defined in CPU and cluster instances.
+
+Named resource definitions have the following grammar:
+
+
+```
+    resource_def            : 'resource' ('(' IDENT ')')?
+                                  resource_decl (',' resource_decl)*  ';' ;
+    resource_decl           : IDENT (':' number)? ('[' number ']')?
+                            | IDENT (':' number)? '{' name_list '}'
+                            | IDENT (':' number)? '{' group_list '}' ;
+
+    port_def                : 'port' port_decl (',' port_decl)* ';' ;
+    port_decl               : IDENT ('<' IDENT '>')? ('(' resource_refs ')')? ;
+    issue_resource          : 'issue' ('(' IDENT ')')? name_list ';' ;
+```
+
+
+
+##### Simple resource definitions
+
+The simplest resource definition is simply a comma-separated list of names:
+
+	**<code>resource name1, name2, name3;</code></strong>
+
+A resource can also have an explicit pipeline stage associated with it, indicating that the defined resources are always used in the specified pipeline phase:
+
+	**<code>resource(E4) name1, name2;    // define resources that are always used in E4</code></strong>
+
+A resource can have a set of bits associated with it. This defines a resource that can be shared between two references if the bits in an associated operand reference are identical.
+
+	**<code>resource immediate:8;         // define a resource with 8 bits of data</code></strong>
+
+
+##### Grouped resource definitions
+
+We can declare a set of named, related resources:
+
+	**<code>resource bits     { bits_1, bits_2, bits_3 };</code></strong>
+
+A resource group typically represents a pool of resources that are shared between instructions executing in parallel, where an instruction may require one or all of the resources. This is a common attribute of VLIW architectures, and used to model things like immediate pools and register ports.
+
+Any defined resource can be included in a group, and the order of the members of a group is significant when members are allocated.  If a group mentions an undefined resource (in either the current or enclosing scope), the member is declared as a resource in the current scope.  In the case above, if the members (bits\_1, etc) are not declared, the compiler would create the definition:
+
+	**<code>resource bits_1, bits_2, bits_3;</code></strong>
+
+and the group members would refer to these definitions. (Note: we don’t support nested groups).
+
+The resource group can be referenced by name, referring to the entire pool, or by individual members, such as “bits.bits\_2” to specify the use of a specific pooled resource.  Consider the following example:
+
+
+```
+	resource bits_1, bits_2, bits_3;
+```
+
+
+	**<code>resource bits_x { bits_1, bits_2, bits_3 };</code></strong>
+
+	**<code>resource bits_y { bits_3, bits_1, bits_2 };</code></strong>
+
+“bits\_x” and “bits\_y” are distinct groups that reference the same members, but members are allocated in a different order.  Groups can also be defined with syntax that indicates how its members are allocated by default.
+
+	**<code>resource bits_or  { bits_1 | bits_2 | bits_3 };       // allocate one of these</code></strong>
+
+	**<code>resource bits_and { bits_1 & bits_2 & bits_3 };       // allocate all of these</code></strong>
+
+Groups can also be implicitly defined in functional unit and subunit template instantiations as a resource parameter.
+
+	**<code>func_unit func my_fu(bits_1 | bits_2 | bits_3);</code></strong>
+
+This implicitly defines a resource group with three members, and passes that group as a parameter of the instance.
+
+
+##### Pooled resource definitions
+
+We can also declare a set of “unnamed” pooled resources:
+
+
+```
+	resource shared_bits[0..5];
+```
+
+
+This describes a resource pool with 6 members.  The entire pool can be referenced by name (ie “shared\_bits”), or each member can be referenced by index (“shared\_bits[3]”), or a subrange of members (“shared\_bits[2..3]). A resource reference can also indicate that it needs some number of resources allocated with the syntax: shared\_bits:<number>.  
+
+Resource pools can also have data associated with them, each member has its own set of bits:
+
+
+```
+resource bits:20 { bits_1, bits_2, bits_3 };
+resource shared_bits:5[6];
+```
+
+
+Resource pools, like resource groups, are used to model things like shared encoding bits and shared register ports, where instructions need one or more members of a set of pooled resources.
+
+Finally, resource definitions can pin a resource to a particular pipeline phase. All references to that resource will be automatically modeled only at that pipeline stage. This is particularly useful for modeling shared encoding bits (typically for resource pools).  The syntax for that looks like:
+
+
+```
+    resource(E1) my_pool { res1, res2, res3 };
+```
+
+
+where E1 is the name of a pipeline phase.  The resource “my\_pool” (and each of its elements) is always modeled to be reserved in pipeline phase E1.
+
+
+#### **Using Resources**
+
+Resource references appear in several contexts.  They are used in all template instantiations to specialize architecture templates (functional units, subunit, or latency templates) and are ultimately used in latency rules to describe pipeline behaviors. These will be described later in the document.
+
+When used to specialize template instances, resource references have the following grammar:
+
+
+```
+    resource_ref            : IDENT ('[' range ']')?
+                            | IDENT '.' IDENT
+                            | IDENT '[' number ']'
+                            | IDENT ('|' IDENT)+
+                            | IDENT ('&' IDENT)+ ;
+```
+
+
+Some examples of resource uses in functional unit instantiations, subunit instantiations, latency instantiations, and latency reference rules:
+
+
+```
+some_resource           // reference a single resource or an entire group/pool    
+some_resource_pool[1]   // use a specific member from an unnamed pool.
+register_ports[6..9]    // select a subset of unnamed pooled resources.
+group.xyzzy             // select a single named item from a group.
+res1 | res2 | res3      // select one of these resources
+res6 & res7 & res8      // select all of these resources
+```
+
+
+References in latency reference rules have additional syntax to support the allocation of resources from groups and pools:
+
+
+```
+    latency_resource_ref    : resource_ref ':' number (':' IDENT)?
+                            | resource_ref ':' IDENT (':' IDENT)?
+                            | resource_ref ':' ':' IDENT
+                            | resource_ref ':' '*'
+                            | resource_ref ;
+```
+
+
+
+##### **Allocating Grouped and Pooled Resources**
+
+Latency references allow you to optionally manage allocation of pooled resources, as well as specifying the significant bits of operands whose values can be shared with other instructions.
+
+A reference of the form:
+
+
+```
+	some_resource_pool:1
+```
+
+
+indicates that a reference needs one element from a group/pooled resource associated with a latency reference. A reference of the form:
+
+
+```
+	some_resource_pool:2
+```
+
+
+indicates that the reference needs 2 (or more) _adjacent_ elements from a pooled resource associated with a latency reference.  A reference of the form:
+
+
+```
+	some_resource_pool:*
+```
+
+
+indicates that a reference needs _all _elements from a resource group or pool. Note that grouped resources can only use :1 and :\*.
+
+A reference of the form:
+
+	`some_resource_pool:size`
+
+indicates an operand reference that requires some number of resources from the resource pool.   The number of resources needed is specified in the “size” attribute of the associated operand type. This enables us to decide at compile time how many resources to allocate for an instruction’s operand based on its actual value.  For example, large operand constant values may require more resources than small constants, while some operand values may not require any resources. There’s a specific syntax for describing these attributes in derived operand definitions (described earlier).
+
+In the examples above, if the resource has shared bits associated with it (it’s shareable by more than one instruction), the entire contents of the operand are shared. In some cases, only part of the operand’s representation is shared, and we can can specify that with the following reference form:
+
+	`some_resource_pool:size:mask`
+
+This indicates that the associated operand’s “mask” attribute indicates which of the operand bits are sharable.  Finally, we can use a share-bits mask without allocation:
+
+	`some_resource_pool::mask`
+
+This reference utilizes the resource - or an entire pool - and uses the operand’s “mask” attribute to determine which bits are shared with other references.
+
+We will describe how these references work when we describe latency rules.
+
+
+### **Defining a Processor Family**
+
+A TableGen description describes a family of processors, or subtargets, that share instruction and register definitions. Information about instruction behaviors are described with Schedules and Itineraries. The MDL also uses common instruction and register descriptions, scraped from TableGen, and adds first-class descriptions of CPUs, functional units, and pipeline modeling.
+
+In an MDL CPU description, a CPU is described as an explicit set of functional units.  Each functional unit is tied to a set of subunits, and subunits are in turn explicitly tied to instruction definitions and pipeline behaviors.  There are two approaches for associating subunits with functional units, and the choice of which one to use is dependent on the attributes of the architecture you’re describing:
+
+
+
+1. Subunit templates specify (either directly or through Latencies) which functional units they use, or
+2. You define functional unit templates that specify exactly which subunits they use.
+
+More detail on this below.
+
+
+#### **Method 1: SuperScalar and Out-Of-Order CPUs**
+
+Fully protected pipelines, forwarding, out-of-order issue and retirement, imprecise micro-operation modeling, and dynamic functional unit allocation make this class of 
+
+CPUs difficult to model_ precisely._  However, because of their dynamic nature, precise modeling is both impossible and unnecessary.  But it is still important to provide descriptions that enable scheduling heuristics to understand the relative temporal behavior of instructions.
+
+This method is similar to the way Tablegen “Schedules” associate instructions with a set of ReadWrite resources, which are in turn associated with sets of ProcResources (or functional units), latencies and micro-operations. This approach works well for superscalar and out-of-order CPUs, and can also be used to describe scalar processors.
+
+The upside of this method is that you don’t need to explicitly declare functional unit templates.  You simply declare CPU instances of the functional units you want, and the MDL compiler creates implicit definitions for them.
+
+The downside of this method is that you can’t specialize functional unit instances, which in turn means you can’t specialize subunit instances, or associated latency instances.  Fortunately, specialization generally isn’t necessary for this class of CPUs.  It would also be difficult to use this method to describe a typical VLIW processor (which is why we have method 2!).
+
+We generally describe this as a “bottoms-up” approach (subunits explicitly tying to functional unit instances), and is the approach used by the Tablegen scraper (tdscan) for “Schedule-based” CPUs.
+
+
+#### **Method 2: VLIWs, and everything else**
+
+This method is appropriate for machines where we must provide more information about the detailed behavior of an instruction so that we can correctly model its issuing and pipeline behavior. It is particularly important for machines with deep, complex pipelines that _must_ be modeled by the compiler.  It has a powerful, flexible user-defined resource scheme which provides a lot more expressiveness than either “Schedules” or “Itineraries”. 
+
+In this method, a functional unit instance is an instantiation of an _issuing_ functional unit, which is more typical of scalar and VLIW CPUs.  In the common case where different instances of a functional unit have different behaviors, we can easily model that using functional unit, subunit, and latency instance specialization, and more detailed latency rules. 
+
+This approach allows a very high degree of precision and flexibility that's not available with method 1.  Its strictly more expressive than the first method, but much of that expressiveness isn’t required by superscalar CPUs.
+
+We describe this as a “tops-down” approach (explicit functional unit template definitions
+
+assert which subunits they support).  This is the method tdscan uses when scraping information about itineraries.
+
+
+#### **Schema of a Full Processor Family Description**
+
+ By convention, a description generally describes things in the following order (although the order of these definitions doesn’t matter):
+
+
+
+*   Definition of the family name.
+*   Describe the pipeline model(s).
+*   Describe each CPU (subtarget) in terms of functional unit instances.
+*   Describe each functional unit template in terms of subunit instances (tops-down approach)
+*   Describe each subunit template type. A subunit represents a class of instruction definitions with similar execution behaviors, and ties those instructions to a latency description.
+*   Describe each latency in terms of operand and resource references.
+
+We will describe each of these items in more detail.  A machine description for a target has the following general schema: (a full syntax is provided in Appendix A)
+
+
+```
+    <family name definition>
+    <pipeline phase descriptions>
+    <global resource definitions>
+    <derived operand definitions>
+
+    // Define CPUs
+    cpu gen_1 { 
+       <cpu-specific resource definitions>
+       <functional unit instance>
+       <functional unit instance>
+       …
+    } 
+    cpu gen_2 { … }
+    …
+
+	// Define Functional Unit Template Definitions (Tops-down approach)
+    func_unit a_1(<functional unit parameters>) { 
+       <functional-unit-specific resource and port definitions>
+       <subunit instance>
+	   <subunit instance>
+	   …
+}
+    func_unit b_1(…) { … } 
+    …
+
+    // Define Subunit Template Definitions
+    subunit add(<subunit parameters>) {
+       <latency instance>
+       <latency instance>
+       …
+    }
+    subunit mul(…) { … }
+    …
+
+    // Latency Template Definitions
+    latency add(<latency parameters>) {
+       <latency reference>
+       <latency reference>
+       …
+    }
+    latency mul(…) { … }
+	…
+
+    // Instruction information scraped from Tablegen description
+    <register descriptions>
+    <register class descriptions>
+    <operand descriptions>
+    <instruction descriptions> 
+```
+
+
+
+##### **Bottoms-up vs Tops-down CPU Definition Schemas \
+**
+
+In the “tops-down” schema, we define CPUs, which instantiate functional units, which instantiate subunits, which instantiate latencies.  At each level of instantiation, the object (functional unit, subunit, latency) can be specialized for the context that it’s instantiated in.  We think of this as a “top-down” definition of a processor family. We provide detailed descriptions for each functional unit template, which we can specialize for each instance.
+
+However, for many processors, this specialization is unnecessary, and the normal schema is overly verbose. For these kinds of processors, we can use the “bottoms-up” schema.
+
+In this schema, the MDL compiler _implicitly_ creates functional unit and latency templates:
+
+
+
+*   A CPU definition specifies which functional units are used in the normal syntax.
+*   Subunits directly implement latency rules inline (rather than instantiate a latency template), including an explicit functional unit instance that they can execute on.
+
+Here’s an example of this kind of bottom-up description:
+
+
+```
+    cpu dual_cpu {
+    	func_unit ALU alu1();     // a "my_alu" functional unit, named "alu1"
+    	func_unit ALU alu2();     // a "my_alu" functional unit, named "alu2"
+    }
+    subunit alu2() {{ def(E2, $dst); use(E1, $src); fus(ALU, 3); }}
+    subunit alu4() {{ def(E4, $dst); use(E1, $src); fus(ALU, 7); }}
+```
+
+
+	`subunit alu7() {{ def(E7, $dst); use(E1, $src); fus(ALU, 42); }}`
+
+Note that we don’t explicitly define the ALU functional unit template, but it is instantiated (twice) and used in three subunit/latency templates. Similarly, we don’t explicitly define the three latency templates.  Both the functional unit template and the latency templates are implicitly created in the MDL compiler. 
+
+While this schema is much more compact, neither the functional units nor the subunits/latencies can be specialized. This is an appropriate approach for scalar and superscalar processors, and is used by tdscan for CPUs that use Tablegen Schedules.
+
+
+#### **Specifying the Family Name**
+
+A family name must be specified that ties the description to the LLVM name for the processor family.  It has the following grammar:
+
+
+```
+family_name        : 'family' IDENT ';' ;
+```
+
+
+
+#### **Pipeline Definitions**
+
+We don’t explicitly define instruction “latencies” in the MDL. Instead, we specify when instructions’ reads and writes happen in terms of pipeline phases.  From this, we can calculate actual latencies. Rather than specify pipeline phases with numbers, we provide a way of naming pipeline stages, and refer to those stages strictly by name. A pipeline description has the following grammar:
+
+
+```
+    pipe_def           : protection? 'phases' IDENT '{' pipe_phases '}' ';'? ;
+    protection         : 'protected' | 'unprotected' | 'hard' ;
+    pipe_phases        : phase_id (',' phase_id)* ;
+    phase_id           : '#'? IDENT ('[' range ']')? ('=' number)? ;
+```
+
+
+ For example:
+
+
+```
+phases my_pipeline { fetch, decode, read1, read2, ex1, ex2, write1, write2 };
+```
+
+
+We typically define these in a global phase namespace, and they are shared between CPU definitions. All globally defined phase names must be unique. However, each CPU definition can have private pipeline definitions, and names defined locally override globally defined names.
+
+You can define more than one pipeline, and each pipeline can have the attribute “protected”, “unprotected”, or “hard”.  “Protected” is the default if none is specified.
+
+
+```
+protected phases alu { fetch, decode, ex1, ex2 };
+unprotected phases vector { vfetch, vdecode, vex1, vex2 };
+hard phases branch { bfetch, bdecode, branch };
+```
+
+
+A “protected” latency describes a machine where the hardware manages latencies between register writes and reads by injecting stalls into a pipeline when reads are issued earlier than their inputs are available, or resources are oversubscribed (pipeline hazards). Most modern general purpose CPUs have protected pipelines, and in the MDL language this is the default behavior.
+
+An “unprotected” pipeline never inserts stalls for read-after-writes or pipeline hazards. In this type of pipeline, reads fetch whatever value is in the register (in the appropriate pipeline phase).  A resource conflict (hazard) results in undefined behavior (ie, the compiler must avoid hazards!). In this model, if an instruction stalls for some reason, the entire pipeline stalls. This kind of pipeline is used in several DSP architectures.
+
+A “hard” latency typically describes the behavior of branch and call instructions, whose side effect occurs at a particular pipeline phase.  The occurrence of the branch or call always happens at that pipeline phase, and the compiler must accommodate that (by inserting code in the “delay slots” of the branch/call).
+
+You can define multiple stages as a group - the following rule is equivalent to the first example above.
+
+**<code>phases alu { fetch, decode, read[1..2], ex[1..2], write[1..2] };</code></strong> \
+
+
+Like C enumerated values, each defined name is implicitly assigned an integer value, starting at zero and increasing sequentially, that represents its integer stage id.  You can explicitly assign values to pipeline phases, as in C, with the syntax  “`phase=value`”. You can also explicitly assign sequential values to a range, by using the syntax `"name[2..5]=value`”. 
+
+Finally, there is specific syntax to annotate the first “execute” phase in a pipeline spec, using the ‘#’ syntax:
+
+
+```
+phases my_pipeline { fetch, decode, #read1, read2, ex1, ex2, write1, write2 };
+```
+
+
+This indicates that “read1” is the first execute stage in the pipeline, which serves as the “default” phase for any operand that isn’t explicitly described in a latency rule.
+
+
+#### **CPU Definitions**
+
+In the definition of a single CPU/subtarget, we specify a high-level description of the processor:
+
+
+
+*   CPU-specific resource definitions
+*   Specification of issue slots and issue-slot usage.
+*   Specialized instances of available functional units, and/or clusters of functional units.
+*   CPU-specific pipeline definitions.
+
+Note that a CPU definition does not attempt to describe the pipeline behavior of functional units, but only specifies which functional units are implemented. The _behavior_ of functional units, and instructions that run on them, are explicitly described in the functional unit _templates._ 
+
+Grammar:
+
+
+```
+    cpu_def                 : 'cpu' IDENT ('(' STRING (',' STRING)* ')')?
+                                 '{' cpu_stmt* '}' ';'? ;
+    cpu_stmt                : pipe_def
+                            | resource_def
+                            | reorder_buffer_def
+                            | issue_statement
+                            | cluster_instantiation
+                            | func_unit_instantiation
+                            | forward_stmt ;
+
+    reorder_buffer_def      : 'reorder_buffer' '<' number '>' ';' ;
+
+    cluster_instantiation   : 'cluster' IDENT '{' cluster_stmt+ '}' ';'? ;
+    cluster_stmt            : resource_def
+                            | issue_statement
+                            | func_unit_instantiation
+                            | forward_stmt ;
+
+    issue_statement         : 'issue' '(' IDENT ')' name_list ';' ;
+
+func_unit_instantiation : 'func_unit' func_unit_instance func_unit_bases*
+                IDENT '(' resource_refs? ')'
+                          		('->' (pin_one | pin_any | pin_all))?  ';' 
+
+func_unit_instance      : IDENT ('<>' | ('<' number '>'))?  
+    func_unit_bases         : ':' func_unit_instance
+
+    pin_one                 : IDENT ;
+    pin_any                 : IDENT ('|' IDENT)+ ;
+    pin_all                 : IDENT ('&' IDENT)+ ;
+```
+
+
+The overall schema of a CPU definition looks like this:
+
+
+```
+    cpu gen_1 { 
+       <cpu-specific resource definitions>
+	   <cpu-specific issue definitions>
+	   <cpu-specific pipeline definitions>
+       <cluster definition or functional unit instance>
+       <cluster definition or functional unit instance>
+       <optional forwarding info>
+       …
+    }
+```
+
+
+and a cluster definition has the schema:
+
+	`cluster xyz { `
+
+
+```
+       <cpu-specific resource definitions>
+	   <cpu-specific issue definitions>
+       <functional unit instance>
+       <functional unit instance>
+       <optional forwarding info>
+       …
+    } 
+```
+
+
+Below are some examples of increasingly complex CPU definitions.
+
+
+##### **Simple Scalar CPU Definition**
+
+In the simplest case, an empty CPU indicates a processor with no specific functional unit information. We assume a serial execution of instructions, with “default” latencies:
+
+
+```
+    cpu simplest_cpu { }
+```
+
+
+A single-alu CPU that has a scheduling model looks like this:
+
+
+```
+    cpu simple_cpu {
+    	func_unit my_alu alu();      // a "my_alu" functional unit, named "alu"
+    }
+```
+
+
+A slightly more complex example is a CPU that is single-issue, but has more than one execution pipeline:
+
+
+```
+    cpu dual_alu_cpu {
+    	issue slot0;                 // a single issue pipeline
+    	func_unit my_alu alu1();     // a "my_alu" functional unit, named "alu1"
+    	func_unit my_alu alu2();     // a "my_alu" functional unit, named "alu2"
+    }
+```
+
+
+
+##### **Multi-Issue CPUs**
+
+Here’s an example of a 2-issue processor with two identical functional units:
+
+
+```
+    cpu dual_issue_cpu {
+    	func_unit my_alu alu1();    // a "my_alu" functional unit, named "alu1"
+    	func_unit my_alu alu2();    // a "my_alu" functional unit, named "alu2"
+    }
+```
+
+
+Processors commonly have functional units with different capabilities - memory units, multipliers, floating point units, etc. The following is a four-issue CPU with 4 different types of functional units.
+
+
+```
+    cpu quad_cpu {
+    	func_unit int_math imath();    // a "int_math" functional unit
+    	func_unit float_math fmath();  // a "float_math" functional unit
+    func_unit memory mem();        // a "memory" functional unit
+    func_unit branch br();         // a "branch" functional unit
+    }
+```
+
+
+
+##### **Defining Issue Slots**
+
+Multi-issue CPUs always have a constrained set of instructions they can issue in parallel.  For superscalar, OOO processors this is generally tied to the number of issue pipelines that are available.  For VLIW, issue slots map directly to encoding bits in a parallel instruction.  In the MDL, you can explicitly define issue slots.  An example:
+
+
+```
+    cpu tri_cpu {
+    	issue slot0, slot1;
+    	func_unit my_alu alu1();     // a "my_alu" functional unit, named "alu1"
+    	func_unit my_alu alu2();     // a "my_alu" functional unit, named "alu2"
+    	func_unit my_alu alu3();     // a "my_alu" functional unit, named "alu3"
+    }
+```
+
+
+In this example, we have 3 functional units, but only two issue slots.  So any of the three functional units can issue in either issue slot, but only two can be issued in parallel.
+
+When issue slots are not specified, each functional unit runs in its own dedicated issue slot.
+
+
+##### **Reservation of Issue Slots**
+
+In VLIW architectures (in particular), some functional units may be “pinned” to a specific set of issue slots, or use multiple issue slots in some cases.  We provide syntax for specifying this:
+
+
+```
+    cpu three_issue_quad_cpu {
+    	issue s0, s1, s2;
+    	func_unit int_math alu1() -> s0;         // alu1 must issue in s0
+    	func_unit float_math alu2() -> s1 | s2;  // alu2 must be in s1 or s2
+    func_unit memory alu3() -> s0 & s1;      // alu3 uses both s0 and s1
+    func_unit branch br();                   // branches can run in any slot
+    }
+```
+
+
+
+##### **SuperScalar and Out-Of-Order CPUs**
+
+In general, the overall approach for defining superscalar CPUs is quite different from other CPU types.   This class of architecture requires information about the size of the reorder buffer, and details about queues for each functional unit. Actual functional unit utilization is described in latency or subunit rules, which can specify exactly which functional units are used.
+
+Functional units can be unreserved (like alu1, below), which means that an instruction or micro-operation that runs on that unit doesn’t actually use that specific resource.  A functional unit can have a single-entry queue - in which case it is unbuffered - or a specific size queue. 
+
+
+```
+    cpu three_issue_superscalar_cpu {
+    	issue s0, s1, s2;
+          reorder_buffer<20>;        		// the reorder buffer is size 20
+    	func_unit int_math<> alu1();		// alu1 is unreserved
+    	func_unit float_math<10> alu2();		// alu2 has 10 queue entries
+    func_unit memory<20> alu3();		// alu3 has 20 queue entries
+    func_unit branch br();			// branch has a single entry
+    }
+```
+
+
+
+##### **Parameterized/Specialized Functional Unit Instances**
+
+A functional unit template can be parameterized with register classes and resource references so that each instance of that functional unit template can be specialized for a specific context. The actual use of these parameters is specified in the functional unit template, explained in the following sections.  This section describes template specialization parameters.
+
+A **register class parameter** asserts that the functional unit instance may impose a register constraint on instructions that execute on it. This constraint is an addition to the register class constraints specified by an instruction’s operand definitions. This enables us to model functional units that are connected to a subset - or a partition - of a register file. It can also be used to describe functional-unit-local register files. Finally, it can disqualify instructions from running on a functional unit if they have register operands or operand constraints that are incompatible with the functional unit constraints.
+
+
+```
+    register r[0..31];
+    register_class ALL { r[0..31] };
+    register_class LOW { r[0..15] };
+    register_class HI  { r[16..31] };
+
+    cpu my_cpu {
+    	func_unit my_alu alu0(LOW);    // instructions use r0..r15
+    	func_unit my_alu alu1(HI);     // instructions use r16..31
+    }
+    instruction add(ALL dst, ALL src1, ALL src2) { … }
+```
+
+
+A **resource parameter** indicates that instructions that execute on the functional unit may use that resource or a member of a resource pool. This is generally used to specify how shared resources are used across functional unit instances. 
+
+
+```
+    cpu my_cpu {
+    	resource shared_thing;                   // a single shared resource
+    	resource reg_ports { p1, p2, p3 };       // three associated resources
+    	resource shared_stuff[20];               // 20 associated resources
+
+    	func_unit math alu0(shared_thing);       // share a named resource
+    	func_unit math alu1(reg_ports.p1);       // share one member of a group
+    	func_unit math alu2(shared_stuff[12]);   // share one member of a pool
+
+    	func_unit mem mem0(reg_ports);           // share an entire group
+    	func_unit mem mem1(shared_stuff);        // share an entire pool
+    	func_unit mem mem2(shared_stuff[3..14]); // share part of a pool
+    }
+```
+
+
+
+##### **Functional Unit Clusters**
+
+A processor definition can include named “clusters” of functional units. Each cluster can define local resources, and define its own issue rules.  The purpose of clusters is primarily as a syntactic convenience for describing processors with functional unit clusters.  An example:
+
+	**<code>cpu my_cpu {</code></strong>
+
+
+```
+		cluster A {
+			issue a, b;
+			func_unit my_alu alu1();
+    	func_unit my_alu alu2(); 
+    		func_unit my_alu alu3();
+    }
+    cluster B {
+			issue a, b;
+			func_unit my_alu alu1();
+    	func_unit my_alu alu2(); 
+    		func_unit my_alu alu3();
+    }
+    }
+
+
+```
+
+
+This describes a 4-issue machine, where 2 instructions can  be issued on each cluster per cycle.
+
+
+##### **Defining Compound Functional Unit Instances**
+
+Its often convenient to define “compound” functional unit instances as collections that include 2 or more “component” units.  A compound unit includes all the capabilities of its component units.  Each component can specify its own reservation queue size.  
+
+
+```
+    cpu compound_units {
+    	Issue s0, s1, s2;
+    	func_unit int_math<5>:load<6> alu1();
+    	func_unit int_math<5>:store<3> alu2();
+    func_unit float_math<20>:branch<2> alu3();
+    func_unit misc<30> alu4();	
+    }
+```
+
+
+This construct is similar to the “super-unit” concept in tablegen.  Only one component of a compound functional unit can be used per cycle. In the above example, “alu3” is the only unit that supports floating point math or branches.  Consequently those operations can’t be issued in parallel.  Similarly, you can issue two integer math operations in parallel, but only if you’re not also issuing a load or store.
+
+Currently, we don’t support specialization parameters on compound functional unit instances. However, you can define functional unit templates with base units, and this provides similar capability.
+
+
+##### **Associating a CPU definition with an LLVM subtarget**
+
+A cpu definition can be directly associated with one or more LLVM subtargets, for example:
+
+	**<code>cpu SiFive7 ("sifive-7-series", "sifive-e76", "sifive-s76", "sifive-u74") { …</code></strong>
+
+At compile time, we can select which CPU definition to use based on normal target-selection command-line options.
+
+
+##### **Modeling Forwarding**
+
+Forwarding is modeled by describing a forwarding network between functional units. The necessity of the concept of a “forwarding” network implies that such networks aren’t fully connected or uniform.
+
+Grammar:
+
+
+```
+forward_stmt            : 'forward' IDENT '->'
+            	                         forward_to_unit (',' forward_to_unit)* ';' ;
+forward_to_unit         : IDENT ('(' snumber ')')?  ;
+```
+
+
+Example:
+
+
+```
+	forward my_alu -> my_adder(1), my_load(2);
+```
+
+
+In a forwarding specification, a unit name can be a functional unit instance name, a functional unit group name, or a functional unit template name.  When using template or group names, all members of the group, or all instances of the specified template type, are implicitly referenced.
+
+For many processors, late functional unit assignment creates a phase-ordering problem in the compiler. Similarly, runtime functional unit assignment implies that we can’t necessarily know if a value will be forwarded or not.  Unless we know with certainty the functional unit assignments for two instructions, we can’t always tell if there is a forwarding path between the two instructions.
+
+This isn’t necessarily a problem for downstream analysis tools, which work with fully scheduled code where all the functional units may have been determined by the compiler. There are several cases that we handle separately:
+
+Case 1: two instructions are both tied to specific functional units: in this case, we can fully determine whether forwarding is supported between the two functional units.
+
+Case 2: two instructions are tied to two sets of functional units (set A and set B) and all functional units in A are forwarded to all functional units in B.  In this case, we can also determine whether forwarding is supported between the two instructions. (We don’t attempt to manage this today.)
+
+Case 3: Same as Case 2, but not all members of A are forwarded to B.  In this case, the compiler could use a probability of forwarding, perhaps.
+
+Case 4: Same as Case 3, but there is no forwarding between A and B.
+
+Note that case 3 is quite common, and can be mitigated if the compiler uses a pass to pre-constrain the sets of functional units each instruction uses. This is quite common in compilers for clustered architectures - a pre-scheduling pass chooses a cluster for each instruction, which effectively constrains the functional units each instruction can run on, and often improves the chances for forwarding between instructions.
+
+Indeed the most common case currently modeled in tablegen files is a functional unit forwarding to itself or the superunit of itself, or a functional unit group forwarding to itself. 
+
+In short, there are architectural cases that cannot be modeled precisely, and there are cases where we simply need a heuristic.  We provide the hooks necessary for a compiler to provide the heuristic based on the existing model.
+
+Note: there is a philosophical question of whether we should provide best case or worst case latencies when the forwarding cannot be statically predicted.  Generally, we believe that worst case latencies are better than best case latencies, simply because too-short latencies can produce code which occasionally (or always) stalls. On the other hand, overestimating the latency produces schedules where a pair of dependent instructions _tend _to be scheduled far enough apart to avoid stalls. In effect, schedulers will separate instructions by the requested latency only when there’s other useful work to do.  Otherwise, there’s no reason to separate them - the stall is inevitable.
+
+
+#### **Functional Unit Template Definitions**
+
+A functional unit template describes, abstractly, what operations can be performed on any instance of the unit, and how those operations use the template parameters - register classes and resource references. An abstract set of operations is represented by a subunit instance, which represents a set of instructions with similar behavior in terms of functional unit usage, resource usage, and register classes. Functional unit templates are defined in their own private namespace.
+
+Functional unit templates are similar to C++ templates in that each instantiation in CPU definitions creates a specialized instance of the functional unit based on the template parameters - register classes and resources.
+
+For superscalar processors, it's not necessary to specify explicit templates for each functional unit used in a CPU description.  The MDL compiler instantiates these automatically depending on how the functional units are referenced in latency templates, tying functional units automatically to their associated subunits.   (The implication of this is that implicitly defined templates cannot be parameterized.)
+
+A functional unit template has the following grammar:
+
+
+```
+    func_unit_template      : 'func_unit' IDENT base_list
+                                    '(' func_unit_params? ')'
+                                    '{' func_unit_template_stmt* '}' ';'? ;
+
+    func_unit_params        : fu_decl_item (';' fu_decl_item)* ;
+    fu_decl_item            : 'resource' name_list
+                            | 'register_class' name_list ;
+
+    func_unit_template_stmt : resource_def
+                            | port_def
+                            | connect_stmt
+                            | subunit_instantiation ;
+
+    port_def                : 'port' port_decl (',' port_decl)* ';' ;
+    port_decl               : IDENT ('<' IDENT '>')? ('(' resource_refs ')')? ;
+    connect_stmt            : 'connect' IDENT
+                                 ('to' IDENT)? ('via' resource_refs)? ';' ;
+
+    subunit_instantiation   : (name_list ':')? subunit_statement
+                            | name_list ':' '{' subunit_statement* '}' ';'? ;
+
+    subunit_statement       : 'subunit' subunit_instance (',' subunit_instance)* ';' ;
+    subunit_instance        : IDENT '(' resource_refs? ')' ;
+```
+
+
+The general schema of a functional unit template looks like this: \
+
+
+
+```
+    func_unit a_1 [: <base_units>] (<functional unit parameters>) { 
+       <functional-unit-specific resource and port definitions>
+       <subunit instance>
+	   <subunit instance>
+	   …
+}
+```
+
+
+
+##### **Simplest Functional Unit Template Definition**
+
+The simplest example of a functional unit template would define a functional unit that has no parameters, and implements a single subunit:
+
+	**<code>func_unit simple() {</code></strong>
+
+
+```
+		subunit xyzzy();
+}
+```
+
+
+In this case, any instruction that is defined to use the subunit “xyzzy” can run on this functional unit. This template doesn’t impose any additional constraints on those instructions, and no shared resources are used.
+
+
+##### **Defining Functional Unit Resources**
+
+A functional unit template can locally define resources which represent hardware resources tied to _each instance_ of the functional unit.  These can be used to specialize subunit instances:
+
+	**<code>func_unit unit_with_local_resources() {</code></strong>
+
+
+```
+		resource my_resource;
+		resource my_pooled_resource[4];
+
+		subunit add(my_resource, my_pooled_resource[0..1]);
+	subunit subtract(my_resource, my_pooled_resource[2..3]);
+	subunit multiply(my_pooled_resource);
+}
+```
+
+
+In this example, the functional unit supports 3 classes of instructions (add, subtract, multiply), and passes slightly different local resources to each. Each instance of this functional unit has an independent set of resources (my\_resource, my\_pooled\_resource).
+
+Importantly: functional-unit-local resources which are used for multiple cycles can be used to model non-pipelined functional units - i.e.units which are reserved for some number of cycles.
+
+
+##### **Defining “Port” Resources**
+
+A port is a resource type that explicitly binds a named register class with a resource reference. A port is used to specialize subunit instances, and adding functional-unit-specific register constraints on instructions associated with the subunit. 
+
+A port definition has the general form:
+
+	**<code>'port' <port_name> ('<' <register_class_name> '>')? ('(' resource_ref ')')? ;</code></strong>
+
+When a port is tied to more than one resource, any references to that port refer to all of the associated resources.  Some examples:
+
+	**<code>port port_a <GPR>;           // port_a tied to GPR regs</code></strong>
+
+
+```
+	port port_b <LOW> (res1);    // port_b tied to LOW regs and res1
+	port port_c (pool[0..4]);    // port_c tied to pool[0..4]
+```
+
+
+You can also use a “connect” statement to tie a port to register classes and resources:
+
+	**<code>'connect' <port_name> 'to' <register_class_name> 'via' resource_ref ;</code></strong>
+
+The following is equivalent to the above definition of “port\_b”:
+
+	**<code>port port_b;</code></strong>
+
+
+```
+connect port_b to LOW via res1;
+```
+
+
+This syntax could potentially be used to connect a port to more than one constraint/resource set, but this capability isn’t currently supported, and this syntax may be deprecated.
+
+Ports can be used to specialize subunit and latency instances, described in subsequent sections.
+
+
+##### **Using Template Parameters**
+
+Resource parameters can be used exactly like locally defined resources to specialize subunit instances. Register class parameters are used to define ports. Resource parameters can refer to a single resource, a pool of resources, or a group of resources.
+
+Here is an example subunit instance:
+
+
+```
+	subunit adder(res, porta, res2, portc);
+```
+
+
+The parameters refer to resources (or ports) defined in the functional unit, cluster, cpu, or globally. The resource parameters themselves can include constrained versions of the resources they refer to, in particular specifying a particular member or a subset of a pooled resource, for example:
+
+
+```
+	subunit load(pool1.member, pool2[5], pool3[2..4]);
+```
+
+
+A simple example of a full functional unit template definition:
+
+	**<code>func_unit specialized(resource shared_pool; class regs) {</code></strong>
+
+
+```
+		resource my_resource;
+		port my_port<regs> (shared_pool[3..5]);
+
+		subunit load(my_resource, my_port);
+	subunit store(my_port);
+}
+```
+
+
+
+##### **Conditional Subunit Instances**
+
+In a functional unit template, a subunit instance can be conditionally instantiated based on a predicate.  Predicates are simply names of the instantiating cpu definition and functional unit instance.  This allows us to specialize a functional unit instance based on how its instantiated, for example:
+
+	**<code>cpu my_cpu {</code></strong>
+
+
+```
+		func_unit my_func xyzzy();
+		func_unit my_func plugh();
+	}
+func_unit my_func() {
+		resource pooled_resource[4];
+		xyzzy: subunit add(pooled_resource[0..1]);
+		plugh: subunit add(pooled_resource[2..3]);
+}
+```
+
+
+
+##### **Using Base Functional Units**
+
+Functional units tend to get more capable over generations of a processor, so we’d like a way to derive functional units from other functional units. A functional unit template can be defined to have a base functional unit, for example:
+
+
+```
+	func_unit base_func() { … }
+	func_unit my_func : base_func() { … }
+```
+
+
+In this example, the template “my\_func” simply includes the definition of “base\_func” in its definition.  In effect, anything “base\_func” can do, “my\_func” can do.  The base functional unit definition must have the same leading parameters as the derived functional unit definition.
+
+In effect, when you instantiate a based functional unit, you implicitly instantiate its bases and any subbases. This language feature allows us to easily extend functional unit definitions over processor generations.
+
+
+##### **Defining functional unit groups**
+
+When defining a superscalar CPU, its generally not necessary to provide a functional unit template definition for each functional unit, since latency rules specify which functional units are used by a subunit.  In this case, its helpful to be able to easily specify an arbitrary pool of functional units that can be used for an instruction.  So the MDL has a way to do that.
+
+
+```
+	func_unit_group          : 'func_group' IDENT ('<' number '>')? : name_list ;
+```
+
+
+For example:
+
+
+```
+    func_group MyGroup<42>  member1, member2, member3;
+```
+
+
+This defines a functional unit group with 3 members, and a single input queue of length 42.  These groups are used in latency rules to tie subunits to a pool of functional units.
+
+
+#### **Subunit Template Definitions**
+
+Subunits are used to link sets of instruction definitions to their pipeline behaviors and candidate functional units. Subunits appear in three contexts:
+
+
+
+*   Each subunit template has a definition.
+*   Functional unit templates instantiate subunits that they support.
+*   Instructions can declare which subunits they are associated with. \
+
+
+A subunit definition abstractly represents a set of instruction definitions that logically have the same behaviors:
+
+
+
+*   When operands are read and written
+*   What resources are used/held/reserved
+*   What functional units they can issue on
+*   What issue slots and/or encoding bits they use
+*   What subtargets are supported
+
+An instruction - or set of instructions - may behave differently between subtargets, and/or functional units, and/or issue slots. Subunit templates are therefore parameterized so that their instances can be specialized for the contexts in which they are instantiated, and they can in turn specialize their associated latency instantiations.
+
+A subunit template definition has the following grammar:
+
+
+```
+    subunit_template        : 'subunit' IDENT su_base_list
+                                 '(' su_decl_items? ')'
+                                 (('{' subunit_body* '}' ';'?) |
+                                  ('{{' latency_items* '}}' ';'? )) ;
+
+    su_base_list            : (':' (IDENT | STRING_LITERAL))* ;
+    su_decl_items           : su_decl_item (';' su_decl_item)* ;
+    su_decl_item            : 'resource' name_list
+                            | 'port'     name_list ;
+    subunit_body            : latency_instance ;
+    latency_instance        : (name_list ':')? latency_statement
+                            | name_list ':' '{' latency_statement* '}' ';'? ;
+    latency_statement       : 'latency' IDENT '(' resource_refs? ')' ';' ;
+```
+
+
+A subunit template has the following general schema:
+
+
+```
+    subunit add <base subunits> (<subunit parameters>) {
+       <latency instance>
+       <latency instance>
+       …
+    }
+```
+
+
+Latency instance instances (in subunit templates) have the following general forms:
+
+	**<code>latency <latency_name> ( <subunit parameters> );</code></strong>
+
+
+```
+	<predicate> : latency <latency_name> ( <latency parameters> ); 
+	<predicate> : { <latency statements> }
+```
+
+
+The optional predicate is a comma-separated list of names which refers to the CPU or functional unit the current subunit is instantiated in. This allows subunits to specify different latencies depending on the CPU or functional unit they are instantiated from. This is similar to the support in functional unit templates for conditional subunit instances. For example:
+
+
+```
+      cpu1, cpu3 : latency xyzzy(port1, port2, etc);
+      alu7:        latency plugh(resource1, resource2, etc);
+```
+
+
+A subunit template can specify as many latency instances as needed - the resulting subunit is the union of all the valid latency templates.  This allows you to separate different classes of behaviors into different latency templates.  Since latency templates are also specialized, you can manage the separation in latencies. The typical practice is for a subunit to have a single latency instance.
+
+
+##### Subunit Template Parameters
+
+Subunit template parameters can be a mix of ports and resources, and are used to specialize a subunit for the context in which it is instantiated, for example:
+
+
+```
+    subunit add (resource A, B; port C) { … }
+```
+
+
+In general, these work exactly the same way functional unit templates are used.  They can be used as latency parameters to specialize latency instances.
+
+
+##### Tying Instructions to Subunits
+
+There are two ways to associate subunits to instructions:
+
+
+
+*   Instructions can specify which subunits they can run on, or
+*   Subunits can specify which instructions they support.
+
+We discuss these two approaches below.
+
+
+###### Subunits in Instructions
+
+Subunits are associated with instruction definitions to...
+
+
+
+*   Define each of their possible pipeline behaviors
+*   Determine which functional units they can be issued on (if any!)
+*   To provide functional-unit-specific register constraints to operand registers
+*   To determine whether an instruction is valid for the selected architecture
+
+Each defined instruction must specify at least one subunit that it is bound to. This is done in tablegen by introduction of a Subunit attribute on each instruction (or instruction class) definition.
+
+We allow more than one subunit per instruction, which implies different instruction behaviors across CPUs or functional units.  In general, this isn’t necessary, since a subunit can specify different behaviors for different functional units and/or CPUs. So this is strictly a stylistic choice.
+
+
+###### Subunit Bases
+
+A subunit template definition can have one or more “bases”.  A base is either the name of another subunit, or a string representing a regular expression of instruction names. Bases tie a subunit to sets of instructions, either directly by instruction name, or transitively through their base subunits. A subunit does not need to have the same parameters as its bases, and does not inherit any latency information from its bases. 
+
+This example ties the “add” subunit to any instruction with “ADD” as a name prefix, and also to any instructions tied to the “base\_add” subunit. 
+
+
+```
+    subunit add : "ADD*" : base_add() {...}
+```
+
+
+Subunit bases provide an alternate way of tying instructions to subunits without modifying the instruction definitions (where each instruction can tie itself to a set of subunits).  This effectively allows a single “base” subunit - and all of its associated instructions  - to have different latency behaviors for each target.
+
+
+##### Shorthand Subunit Template Definitions
+
+Often a subunit template simply specifies a single latency template instance, and the latency template may only be used in a single subunit template.  In that case, we have a shorthand that combines the latency template into the subunit template.  For example:
+
+
+```
+subunit load(resource a, b, c) {
+	latency load(resource a, b, c);
+}
+latency load(resource a, b, c) { def(E1, $dst); use(E1, $src); … }
+```
+
+
+Can be alternatively expressed as:
+
+
+```
+subunit load(resource a, b, c) {{
+	def(E1, $dst); use(E1, $src); … 
+}}
+```
+
+
+
+#### **Latency Template Definitions**
+
+A latency template specifies the detailed pipeline behavior for a class of instructions. The class of “client” instructions for a latency template is the set of instructions that use any subunit that instantiates the latency template.
+
+Latency templates are specialized for the exact context they are instantiated in - so they are statically polymorphic: a single latency template instantiated in many contexts can describe many different behavior sets for a single instruction depending on the CPU, the functional unit instance, subunit instance, the latency instance, and the instruction itself.
+
+Latency templates:
+
+
+
+*   describe what happens at each stage of the execution pipeline in terms of register operands and resources used and reserved.
+*   optionally imposes additional functional-unit-specific constraints on register operands.
+
+A latency template definition has the following general schema:
+
+	`latency <name> : base_latencies ( <parameters> ) {`
+
+
+```
+          <latency reference>
+          <latency reference>
+	   …
+	}
+```
+
+
+Latency templates can be derived from other latencies, and take resources or ports as parameters. The body of the template is simply a set of latency references.
+
+The full grammar:
+
+
+```
+    latency_template        : 'latency' IDENT base_list
+                                 '(' su_decl_items? ')'
+                                 '{' latency_items* '}' ';'? ;
+    latency_items           : latency_refs
+                            | micro_ops_statement ;
+    latency_refs            : (name_list ':')?
+                                   (latency_item | ('{' latency_item* '}' ';'?)) ;
+    latency_item            : latency_ref
+                            | conditional_ref
+                            | fus_statement ;
+
+    conditional_ref         : 'if' IDENT '{' latency_item* '}'
+                                   (conditional_elseif | conditional_else)? ;
+    conditional_elseif      : 'else' 'if' IDENT '{' latency_item* '}'
+                                   (conditional_elseif | conditional_else)? ;
+    conditional_else        : 'else' '{' latency_item* '}' ;
+
+    latency_ref             : ref_type '(' latency_spec ')' ';' ;
+    ref_type                : ('use' | 'def' | 'usedef' | 'kill' |
+                               'hold' | 'res' | 'predicate' | 'fus') ;
+    latency_spec            : expr (':' number)? ',' latency_resource_refs
+                            | expr ('[' number (',' number)? ']')? ',' operand
+                            | expr ',' operand ',' latency_resource_refs ;
+    expr                    : '-' negate=expr
+                            | expr ('*' | '/') expr
+                            | expr ('+' | '-') expr
+                            | '{' expr '}'
+                            | '(' expr ')'
+                            | IDENT
+                            | number
+                            | operand ;
+
+    fus_statement           : 'fus' '(' (fus_item ('&' fus_item)*  ',')?
+                                      snumber (',' fus_attribute)* ')' ';'
+
+    fus_item                : IDENT ('<' (expr ':')? number '>')? ;
+    fus_attribute           : 'BeginGroup' | 'EndGroup' | 'SingleIssue'
+  	                        | 'RetireOOO' ;
+
+    latency_resource_refs   : latency_resource_ref (',' latency_resource_ref)* ;
+    latency_resource_ref    : resource_ref ':' number (':' IDENT)?
+                            | resource_ref ':' IDENT (':' IDENT)?
+                            | resource_ref ':' ':' IDENT      // no allocation
+                            | resource_ref ':' '*'            // allocate all
+                            | resource_ref ;
+    operand                 : (IDENT ':')? '$' IDENT ('.' operand_ref)*
+                            | (IDENT':')? '$' number
+                            | (IDENT':')? '$$' number
+
+    operand_ref             : (IDENT | number) ;
+```
+
+
+
+##### **Derived Latency Templates**
+
+A latency template can be derived from one or more base latency templates.  Any hierarchy is allowed (except recursive), as long as the base template has the exact same leading parameters as the derived latency.  A base latency can be included more than once in the hierarchy - this doesn’t matter, since all occurrences of that base are identical (so duplicates are ignored):
+
+
+```
+	latency base1 (resource a) { … }
+	latency base2 (resource a, b) { … }
+	latency base3 : base1 (resource a) { … }
+	latency my_latency : base2 : base3(resource a, b, c) { … }
+```
+
+
+In this example, my\_latency includes base1, base2, and base3. Deriving latency templates is a fairly common pattern: instruction classes often share _some_ behaviors, but not all.  So those shared behaviors can be put in a base latency template.  A common example is an instruction predicate, perhaps shared by all instructions.
+
+
+##### **Latency References**
+
+A latency reference statement describes a single operand reference and/or resource references in a specified pipeline stage. It references instruction operands _by name, _as well as resource and port parameters, and ties the operations to named pipeline phases.
+
+Latency references have the following general form:
+
+
+```
+<operator> (<phase expression>, <operand specifier>, <ports/resources>);
+```
+
+
+where either the operand specifier or ports/resources may be omitted. A single reference statement asserts that an operand and resources are referenced in a specific pipeline phase for any instruction that this rule could apply to, ie: _any instruction that uses a subunit that instantiates this latency template._  Each aspect of a latency reference are described below.
+
+
+###### **Operand and resource latency operators:**
+
+There are 6 basic operator types in a latency reference:
+
+
+
+*   use - read a register, and/or use a resource
+*   def - write a register, and optional use of a resource
+*   predicate - specifies which register operand is an instruction predicate
+*   reserve - reserve a resource until a specific pipeline stage.
+*   hold - hold issue until a resource is available for reservation.
+*   fus - reserve a functional unit for a specified number of cycles, and/or a specified number of micro-ops needed for the instruction.
+
+There are 3 additional operator types which are primarily used as shortcuts (these are currently parsed, but unimplemented in the llvm integration):
+
+
+
+*   usedef - a use and a def of an operand (a shorthand syntax) 
+*   kill - the register value is wiped and no value is defined (typically used in call instructions)
+*   or - this is essentially a conditional def, but the instruction has no explicit predicate (useful for status-setting instructions).
+
+
+###### **Phase Expressions**
+
+The phase expression specifies the pipeline phase that the operation occurs in. The expression can refer directly to a defined phase name, or an expression based on a phase name:
+
+
+```
+	use(E7, $operand, res);    // use operand and res in cycle E7
+	use(E7+5, $operand, res);  // use operand and res in cycle E7+5
+```
+
+
+An instruction may perform a reference at a cycle which is a function of immediate operands of the instruction instance.  For example: 
+
+
+```
+	use(E1 + $width - 12, $operand, res);
+```
+
+
+where “$width” is an immediate instruction operand. Its value is fetched from the instruction instance and used in the expression.  As with any operand specifier, if the client instruction doesn’t have an immediate operand named “width”, the rule is ignored for that instruction.
+
+Phase expressions have a limited set of operators: +, -, \*, /, ().  Since latencies must be positive integers, we also provide a “floor” operator which converts negative expressions to 0. Simply enclose the expression in curly braces ({...}).
+
+
+###### **Operand Specifiers**
+
+The operand specifier has the same grammar as in tablegen, which allows you to specify an optional operand type, the operand name, and optional sub-operand names: 
+
+
+```
+    operand                 : (IDENT ':')? '$' IDENT ('.' operand_ref)*
+                            | (IDENT':')? '$' number
+                            | (IDENT':')? '$$' number
+
+    operand_ref             : (IDENT | number) ;
+```
+
+
+Operand specifiers act as predicates for the validity of a reference for a particular instruction. Some examples:
+
+
+```
+	GPR:$dst         // an operand named "dst" with operand type GPR
+	ADR:$dst         // an operand named "dst" with operand type ADR
+	$dst             // an operand named "dst", with any operand type
+```
+
+
+`	opnd:$src.reg    // an operand named "src", type "opnd", suboperand "reg"` 
+
+Because a latency could be specialized for many instructions which have different sets of operands, the operand specifier acts as a predicate for the application of a reference to a particular instruction. When the operand isn’t present in a client instruction, the latency reference is ignored for that instruction.  For example, you can differentiate on operand type:
+
+
+```
+	def(E5, GPR:$dst);
+	def(E7, FPR:$dst);
+```
+
+
+In this example, instructions with a GPR dst operand write their results in cycle E5, while instructions with an FPR dst operand write their results in cycle E7.
+
+Or you can differentiate based on the operand name:
+
+
+```
+	use(E2, $src1);              // most instructions have at least one src opnd
+	use(E3, $src2);              // some instructions have 2 source operands
+```
+
+
+`	use(E4, $src3);              // and some instructions have 3!` \
+
+
+Note that operands _can _be referenced by their index in an instruction’s operand list, but this is error-prone and this isn’t considered best practice because we can’t thoroughly check the validity of the index.  The syntax is simply “$<index>”.  Note that sub-operands often aren’t given names in tablegen, and must be referenced by index, for example: $src.1.  Unnamed variant operands (obviously) don’t have names, and are referenced by their position past the end of the operands defined for an instruction, ie “$$1”, “$$2”, etc.
+
+
+###### **Resource References**
+
+Any latency reference can include an optional set of resource references. These have slightly different semantics depending on the operator type (def/use/predicate/hold/reserve).
+
+For “use”, “def”, and “predicate” statements, a set of resource references can be specified that are associated with the operand reference. As with all latency references, the operand must match an operand of the client instruction. If the reference is valid, the resource is “used” - for any of these operators - at the pipeline phase specified, unless the resource was defined with a specific phase. The “use” of the resource is equivalent to a single-cycle hold/reserve of that resource.  Some examples:
+
+
+```
+	use(E1, $src, my_res);       // use "my_res" at cycle E1
+	def(E32, $dst, my_res);      // use "my_res" at cycle E32
+```
+
+
+For “hold” and “reserve” operations, the operand specifier is optional, and if present serves _only _as a predicate that indicates whether the reference is valid or not. However, at least one resource reference is required for these statements. A few examples:
+
+
+```
+	hold(E1, my_res);           // hold issue at E1 until resources are available
+	res(E32, $dst, my_res);     // reserve resources up to cycle E32
+```
+
+
+
+##### **Conditional References**
+
+Any reference in a latency rule can be conditional, using a predicate identifier.  The predicates are generally identical to LLVM predicates, and check an attribute of a client instruction. 
+
+Conditional references can be nested, for arbitrarily complex references. These have the following general form:
+
+
+```
+if <predicate_name> { <set of refs> } 
+else if <predicate_name> { <set of refs> }
+else { <set of refs> } 
+```
+
+
+
+##### **Functional Unit and Micro-op References**
+
+A latency rule can directly specify a set of functional units and how long they are used, as well as specifying the number of micro-ops required for the operation.  Each functional unit can optionally specify a pipeline “StartAt” cycle, which by default is the first execution phase.
+
+
+```
+	fus(13);                    // Instruction has 13 micro-operations.
+    fus(ALU, 2);                // use ALU for 1 cycle, 2 micro-operations.
+	fus(ALU<3>, 1);             // use ALU for 3 cycles, 1 micro-operation.
+	fus(ALU<E5:4>, 1);          // use ALU starting at E5 for 4 cycles.
+	fus(ALU1<12>&ALU2<E12:30>&LOAD<E42:2>);    // use ALU1, ALU2, and LOAD
+```
+
+
+These statements allow a latency rule (or subunit) to tie a set of instructions to functional unit instancesWhen there is more than one instance of the specified unit, or if the unit is declared as a functional unit group, at compile time one of those units is selected.  Likewise, if the unit is a subunit of one or more functional units, one of the “parent” functional units is selected.
+
+### **Machine Description Compiler Artifacts**
+
+What does all of this produce?
+
+The primary artifact of the MDL compiler is a set of data that we associate with each instruction description in a targeted compiler.  For each instruction, at compiler-build time we produce a list of objects, each of which describe that instruction’s behavior on a single functional unit instance.  The instruction will have one of these objects for each functional unit instance that it can be scheduled on across all CPUs. These are written out as a set of auto-initialized collections of objects that are attached to instruction templates in the target compiler.
+
+Each of these objects describe the behavior of each instruction cycle by cycle:
+
+
+
+*   What operand’s registers it reads and write,
+*   What register constraints are applied to operands,
+*   What resources it uses, holds on, or reserves.
+*   What explicit functional unit and issue slots it uses.
+*   What pooled resources need to be allocated.
+
+The other primary artifact is a set of objects and methods for managing the low-level details of instruction scheduling and register allocation.  This includes methods to build and manage resource pools, pipeline models, resource reservation infrastructure, and instruction bundling, all specialized for the input machine description.
+
+As part of this effort, we will incrementally modify the LLVM compiler to alternatively use this information alongside of SchedMachineModel and Itinerary methodologies.
+
+
+
+<p id="gdcalert1" ><span style="color: red; font-weight: bold">>>>>  GDC alert: inline image link here (to images/image1.png). Store image on your image server and adjust path/filename/extension if necessary. </span><br>(<a href="#">Back to top</a>)(<a href="#gdcalert2">Next alert</a>)<br><span style="color: red; font-weight: bold">>>>> </span></p>
+
+
+![alt_text](images/image1.png "image_tooltip")
+
+
+
+### **Using the MDL Language in LLVM**
+
+The proposed use case for the MDL language is as an alternative specification for the architecture description currently embodied in TableGen Schedules and Itineraries, particularly for architectures for which Schedules and Itineraries are not expressive enough.  It is explicitly _not_ the intent that it “replace TableGen”.  But we believe that the MDL language is a better language (vs Schedules and Itineraries) for a large class of accelerators, and can be used effectively alongside TableGen.
+
+We’ve written a tool (TdScan) which extracts enough information from TableGen descriptions so that we can sync instruction definitions with architecture definitions. TdScan can also optionally scrape all of the Schedule and Itinerary information from a tablegen description and produce an equivalent\*\* MDL description.
+
+So there are several possible MDL usage scenarios:
+
+
+
+*   _Current: _Given a complete tablegen description with schedules or itineraries, scrape the architecture information and create an MDL description of the architecture every time you build the compiler.
+*   _Transitional: _Scrape an existing tablegen description and keep the generated MDL file, using it as the architecture description going forward.
+*   _Future (potentially): _when writing a compiler for a new architecture, write an MDL description rather than schedules and/or itineraries.
+
+The general development flow of using an MDL description in LLVM looks like this: 
+
+
+
+1. Write an architecture description (or scrape one from an existing tablegen description).
+    1. Instructions, operands, register descriptions in .td files
+    2. Microarchitecture description in .mdl files
+2. Compile TD files with TableGen 
+3. Use TdScan to scrape instruction, operand, and register information from tablegen, producing a .mdl file
+4. Compile the top-level MDL file (which includes the scraped Tablegen information). This produces C++ code for inclusion in llvm.
+5. Build LLVM.
+
+
+
+<p id="gdcalert2" ><span style="color: red; font-weight: bold">>>>>  GDC alert: inline image link here (to images/image2.png). Store image on your image server and adjust path/filename/extension if necessary. </span><br>(<a href="#">Back to top</a>)(<a href="#gdcalert3">Next alert</a>)<br><span style="color: red; font-weight: bold">>>>> </span></p>
+
+
+![alt_text](images/image2.png "image_tooltip")
+
+
+
+#### **TdScan**
+
+To synchronize an MDL architecture description with llvm TableGen descriptions, we’ve written a tool which scrapes information that the MDL compiler needs from Tablegen files. In the general case, it collects basic information about registers, register classes, operands, and instruction definitions, and it produces an “mdl” file which can be processed by the MDL compiler to sync an architecture description to the tablegen descriptions of instructions.
+
+For currently upstreamed targets that use Schedules or Itineraries, TdScan can also extract the whole architecture specification from the tablegen files, and produce an MDL description of the architecture. We’ve used this approach to prove out our llvm integration with upstreamed targets. The integration and testing of this is ongoing.
+
+
+#### **Upstream Targets**
+
+In general, upstream targets have no compelling need for MDL descriptions - the existing Schedules and/or Itinerary descriptions are field tested. However, there are a few benefits to using an MDL description for existing targets.  The primary benefit is that the MDL descriptions are typically quite a bit smaller, succinct, and (we believe) intuitive than the equivalent TableGen descriptions. 
+
+
+<table>
+  <tr>
+   <td><strong>CPU</strong>
+   </td>
+   <td><strong>MDL Lines of Code</strong>
+   </td>
+   <td><strong>Tablegen Lines of Code</strong>
+   </td>
+  </tr>
+  <tr>
+   <td><strong>AArch64</strong>
+   </td>
+   <td>2866
+   </td>
+   <td>21429
+   </td>
+  </tr>
+  <tr>
+   <td><strong>AMDGPU</strong>
+   </td>
+   <td>299
+   </td>
+   <td>388
+   </td>
+  </tr>
+  <tr>
+   <td><strong>ARM</strong>
+   </td>
+   <td>3380
+   </td>
+   <td>9371
+   </td>
+  </tr>
+  <tr>
+   <td><strong>Hexagon</strong>
+   </td>
+   <td>1947
+   </td>
+   <td>17625
+   </td>
+  </tr>
+  <tr>
+   <td><strong>Lanai</strong>
+   </td>
+   <td>87
+   </td>
+   <td>69
+   </td>
+  </tr>
+  <tr>
+   <td><strong>Mips</strong>
+   </td>
+   <td>472
+   </td>
+   <td>3003
+   </td>
+  </tr>
+  <tr>
+   <td><strong>PowerPC</strong>
+   </td>
+   <td>4251
+   </td>
+   <td>4276
+   </td>
+  </tr>
+  <tr>
+   <td><strong>RISCV</strong>
+   </td>
+   <td>123
+   </td>
+   <td>2909
+   </td>
+  </tr>
+  <tr>
+   <td><strong>Sparc</strong>
+   </td>
+   <td>237
+   </td>
+   <td>123
+   </td>
+  </tr>
+  <tr>
+   <td><strong>SystemZ</strong>
+   </td>
+   <td>1638
+   </td>
+   <td>9224
+   </td>
+  </tr>
+  <tr>
+   <td><strong>X86</strong>
+   </td>
+   <td>5686
+   </td>
+   <td>25631
+   </td>
+  </tr>
+</table>
+
+
+\*\* Note: these numbers are generated using “index-based” references in subunit/latency rules, rather than symbolic references.  These are typically 10-20% less lines of MDL description than when operand names are used, almost entirely due to operand name differences between instruction definitions (like “dest” vs “dst”, or “src1” vs “s1”).  However, the databases produced by the two approaches are virtually identical - albeit ordered differently.
+
+
+#### **Syncing Instruction Information**
+
+The MDL compiler needs 3 pieces of information from tablegen for each machine instruction:
+
+
+
+1. The instruction opcode name
+2. Each operand’s name, type, and order of appearance in an instruction instance
+3. The name(s) of the subunit(s) it can run on.
+
+Subunits are a new concept introduced with the MDL.  The normal approach is to modify each tablegen instruction description to explicitly specify subunit assignments, which become an additional instruction attribute.  The other approach is to use subunit template bases to use regular expressions to tie instructions to subunits (just like InstRW records).  
+
+As part of the build process, we use a program (“tdscan”) which scrapes the instruction information - including the subunit information from a target’s tablegen files and generates information about the target’s instructions.  Tdscan allows us to stay in sync with changes to instruction definitions.
+
+
+#### **Using the generated microarchitecture information in LLVM**
+
+There are two classes of services that the MDL database and associated APIs provide:
+
+
+
+*   Detailed pipeline modeling for instructions (for all processors, for all functional units) including instruction latencies calculations and resource usage (hazard management)
+*   Parallel instruction bundling and instruction scheduling.
+
+The tablegen scraper (tdscan) can correctly scan all upstreamed targets and generate correct instruction, operand, and register class information for all of them.
+
+We can also extract high-level architecture information and generate correct MDL descriptions for all the upstreamed targets that have Schedules or Itineraries (AArch64, AMDGPU, AMD/R600, ARM, Hexagon, Lanai, Mips, PPC, RISCV, Sparc, SystemZ, X86).  Usually, the new architecture spec is dramatically simpler than the tablegen descriptions.
+
+We provide code and libraries to do the following things - in machine-independent ways:
+
+
+
+*   Calculate accurate instruction latencies.
+*   A set of APIs to build and manage instruction bundles (parallel instructions), performing all the required legality checks and resource allocation based on information in the generated database.
+*   Manage resource reservations and hazard management for an instruction scheduler.
+*   Determine latencies between instructions based on resource holds and reservations.
+*   Methods to query functional unit, issue slot, and resource assignments for a bundled/scheduled instruction.
+*   Methods to query the set of all register uses and defs for an instruction instance, with accurate timing information.
+*   Manage functional unit register forwarding.
+
+There’s more we can do here, and a deeper integration with upstreamed LLVM is a long-term goal.
+
+
+#### **Current Status of the LLVM Integration (briefly)**
+
+
+
+*   We can generate MDL full architecture specs for all upstreamed targets, and properly represent and use all metadata associated with Schedules and Itineraries.
+*   The MDL database is used to properly calculate instruction latencies for all architectures.  Caveat: we don’t yet fully convert Itinerary and Schedule forwarding information, since the LLVM model for forwarding is fundamentally different from the MDL model, and the provided information is typically incomplete.
+*   We’ve integrated the MDL-based bundle-packing and hazard management into all the LLVM schedulers, with the exception of the Swing scheduler, which is still in progress. We’ve run all the standard tests, and in most cases we produce the same schedules, with positive and negative performance differences in the noise.
+
+ 
+
+
+### **Appendix A: Full Language Grammar**
+
+The definitive Antlr4-based grammar is in llvm/utils/MdlCompiler/mdl.g4. 
+
+
+```
+architecture_spec       : architecture_item+ EOF ;
+architecture_item       : family_name
+                        | cpu_def
+                        | register_def
+                        | register_class
+                        | resource_def
+                        | pipe_def
+                        | func_unit_template
+                        | func_unit_group
+                        | subunit_template
+                        | latency_template
+                        | instruction_def
+                        | operand_def
+                        | derived_operand_def
+                        | import_file
+                        | predicate_def ;
+
+import_file             : 'import' STRING ;
+
+family_name             : 'family' IDENT ';' ;
+
+//---------------------------------------------------------------------------
+// Top-level CPU instantiation.
+//---------------------------------------------------------------------------
+cpu_def                 : 'cpu' IDENT ('(' STRING (',' STRING)* ')')?
+                             '{' cpu_stmt* '}' ';'? ;
+
+cpu_stmt                : pipe_def
+                        | resource_def
+                        | reorder_buffer_def
+                        | issue_statement
+                        | cluster_instantiation
+                        | func_unit_instantiation
+                        | forward_stmt ;
+
+cluster_instantiation   : 'cluster' IDENT '{' cluster_stmt+ '}' ';'? ;
+
+cluster_stmt            : resource_def
+                        | issue_statement
+                        | func_unit_instantiation 
+                        | forward_stmt ;
+
+issue_statement         : 'issue' '(' IDENT ')' name_list ';' ;
+
+func_unit_instantiation : 'func_unit' func_unit_instance func_unit_bases*
+                IDENT '(' resource_refs? ')'
+                          		('->' (pin_one | pin_any | pin_all))?  ';'
+
+func_unit_instance      : IDENT ('<>' | ('<' number '>'))?  
+func_unit_bases         : ':' func_unit_instance
+
+pin_one                 : IDENT ;
+pin_any                 : IDENT ('|' IDENT)+ ;
+pin_all                 : IDENT ('&' IDENT)+ ;
+
+//---------------------------------------------------------------------------
+// A single forwarding specification (in CPUs and Clusters).
+//---------------------------------------------------------------------------
+forward_stmt            : 'forward' IDENT '->'
+                                      forward_to_unit (',' forward_to_unit)* ';' ;
+forward_to_unit         : IDENT ('(' snumber ')')? ;
+
+//---------------------------------------------------------------------------
+// Functional unit template definition.
+//---------------------------------------------------------------------------
+func_unit_template      : 'func_unit' IDENT base_list
+                                '(' func_unit_params? ')'
+                                '{' func_unit_template_stmt* '}' ';'? ;
+
+func_unit_params        : fu_decl_item (';' fu_decl_item)* ;
+fu_decl_item            : 'resource'  name_list
+                        | 'register_class' name_list ;
+
+func_unit_template_stmt : resource_def
+                        | port_def
+                        | connect_stmt
+                        | subunit_instantiation ;
+
+port_def                : 'port' port_decl (',' port_decl)* ';' ;
+port_decl               : IDENT ('<' IDENT '>')? ('(' resource_ref ')')? ;
+connect_stmt            : 'connect' IDENT
+                             ('to' IDENT)? ('via' resource_ref)? ';' ;
+
+//---------------------------------------------------------------------------
+// Functional unit group definition.
+//---------------------------------------------------------------------------
+func_unit_group         : FUNCGROUP IDENT ':' name_list ';' ;
+
+//---------------------------------------------------------------------------
+// Definition of subunit template instantiation.
+//---------------------------------------------------------------------------
+subunit_instantiation   : (name_list ':')? subunit_statement
+                        | name_list ':' '{' subunit_statement* '}' ';'? ;
+
+subunit_statement       : 'subunit' subunit_instance (',' subunit_instance)* ';' ;
+subunit_instance        : IDENT '(' resource_refs? ')' ;
+
+//---------------------------------------------------------------------------
+// Definition of subunit template definition.
+//---------------------------------------------------------------------------
+subunit_template        : 'subunit' IDENT su_base_list '(' su_decl_items? ')'
+                             (('{' subunit_body* '}' ';'?) |
+                              ('{{' latency_items* '}}' ';'? )) ;
+
+su_decl_items           : su_decl_item (';' su_decl_item)* ;
+su_decl_item            : 'resource' name_list
+                        | 'port'     name_list ;
+
+su_base_list            : (':' (IDENT | STRING_LITERAL))* ;
+
+subunit_body            : latency_instance ;
+latency_instance        : (name_list ':')? latency_statement
+                        | name_list ':' '{' latency_statement* '}' ';'? ;
+latency_statement       : 'latency' IDENT '(' resource_refs? ')' ';' ;
+
+//---------------------------------------------------------------------------
+// Latency template definition.
+//---------------------------------------------------------------------------
+latency_template        : 'latency' IDENT base_list
+                             '(' su_decl_items? ')'
+                             '{' latency_items* '}' ';'? ;
+
+latency_items           : (name_list ':')?
+                               (latency_item | ('{' latency_item* '}' ';'?)) ;
+
+latency_item            : latency_ref
+                        | conditional_ref
+                        | fus_statement ;
+
+//---------------------------------------------------------------------------
+// Conditional references
+//---------------------------------------------------------------------------
+conditional_ref         : 'if' IDENT '{' latency_item* '}'
+                               (conditional_elseif | conditional_else)? ;
+conditional_elseif      : 'else' 'if' IDENT '{' latency_item* '}'
+                               (conditional_elseif | conditional_else)? ;
+conditional_else        : 'else' '{' latency_item* '}' ;
+
+//---------------------------------------------------------------------------
+// Basic references
+//---------------------------------------------------------------------------
+latency_ref             : ref_type '(' latency_spec ')' ';' ;
+
+ref_type                : ('use' | 'def' | 'usedef' | 'kill' |
+                           'hold' | 'res' | 'predicate') ;
+
+latency_spec            : expr (':' number)? ',' latency_resource_refs
+                        | expr ('[' number (',' number)? ']')? ',' operand
+                        | expr ',' operand ',' latency_resource_refs ;
+
+expr                    : '-' expr
+                        | expr ('*' | '/') expr
+                        | expr ('+' | '-') expr
+                        | '{' expr '}'
+                        | '(' expr ')'
+                        | IDENT
+                        | number
+                        | operand ;
+
+//---------------------------------------------------------------------------
+// Shorthand for a reference that uses functional units.
+//---------------------------------------------------------------------------
+fus_statement           : 'fus' '(' (fus_item ('&' fus_item)* ',')?
+                                  snumber (',' fus_attribute)* ')' ';'
+                        ;
+fus_item                : IDENT ('<' (expr ':')? number '>')? ;
+
+fus_attribute           : 'BeginGroup' | 'EndGroup' | 'SingleIssue' | 'RetireOOO' ;
+
+//---------------------------------------------------------------------------
+// Latency resource references
+//---------------------------------------------------------------------------
+latency_resource_refs   : latency_resource_ref (',' latency_resource_ref)* ;
+
+latency_resource_ref    : resource_ref ':' number (':' IDENT)?
+                        | resource_ref ':' IDENT (':' IDENT)?
+                        | resource_ref ':' ':' IDENT       // no allocation
+                        | resource_ref ':' '*'             // allocate all members
+                        | resource_ref ;
+
+operand                 : (IDENT ':')? '$' IDENT ('.' operand_ref)*
+                        | (IDENT ':')? '$' number
+                        | (IDENT ':')? '$$' number
+
+operand_ref             : (IDENT | number) ;
+
+//---------------------------------------------------------------------------
+// Pipeline phase names definitions.
+//---------------------------------------------------------------------------
+pipe_def                : protection? 'phases' IDENT '{' pipe_phases '}' ';'? ;
+protection              : 'protected' | 'unprotected' | 'hard' ;
+pipe_phases             : phase_id (',' phase_id)* ;
+phase_id                : '#'? IDENT ('[' range ']')? ('=' number)? ;
+
+//---------------------------------------------------------------------------
+// Resource definitions: global in scope, CPU- or Datapath- or FU-level.
+//---------------------------------------------------------------------------
+resource_def            : 'resource' ( '(' IDENT ('..' IDENT)? ')' )?
+                              resource_decl (',' resource_decl)*  ';' ;
+
+resource_decl           : IDENT (':' number)? ('[' number ']')?
+                        | IDENT (':' number)? '{' name_list '}' 
+                        | IDENT (':' number)? '{' group_list '}' ;
+
+resource_refs           : resource_ref (',' resource_ref)* ;
+
+resource_ref            : IDENT ('[' range ']')?
+                        | IDENT '.' IDENT
+                        | IDENT '[' number ']'
+                        | IDENT ('|' IDENT)+
+                        | IDENT ('&' IDENT)+ ;
+
+//---------------------------------------------------------------------------
+// List of identifiers.
+//---------------------------------------------------------------------------
+name_list               : IDENT (',' IDENT)* ;
+group_list              : IDENT ('|' IDENT)+
+                        | IDENT ('&' IDENT)+ ;
+
+//---------------------------------------------------------------------------
+// List of template bases
+//---------------------------------------------------------------------------
+base_list               : (':' IDENT)* ;
+
+//---------------------------------------------------------------------------
+// Register definitions.
+//---------------------------------------------------------------------------
+register_def            : 'register' register_decl (',' register_decl)* ';' ;
+register_decl           : IDENT ('[' range ']')? ;
+
+register_class          : 'register_class' IDENT
+                            '{' register_decl (',' register_decl)* '}' ';'?
+                        | 'register_class' IDENT '{' '}' ';'? ;
+
+//---------------------------------------------------------------------------
+// Instruction definition.
+//---------------------------------------------------------------------------
+instruction_def         : 'instruction' IDENT
+                            '(' (operand_decl (',' operand_decl)*)? ')'
+                            '{'
+                                ('subunit' '(' name_list ')' ';' )?
+                                ('derived' '(' name_list ')' ';' )?
+                            '}' ';'? ;
+
+//---------------------------------------------------------------------------
+// Operand definition.
+//---------------------------------------------------------------------------
+operand_def             : 'operand' IDENT
+                             '(' (operand_decl (',' operand_decl)*)? ')'
+                             '{' (operand_type | operand_attribute)* '}' ';'?
+                        ;
+operand_decl            : ((IDENT (IDENT)?) | '...') ('(I)' | '(O)')? ;
+
+operand_type            : 'type' '(' IDENT ')' ';' ;
+
+operand_attribute       : (name_list ':')? operand_attribute_stmt
+                        | name_list ':' '{' operand_attribute_stmt* '}' ';'? ;
+operand_attribute_stmt  : 'attribute' IDENT '=' (snumber | tuple)
+                           ('if' ('lit' | 'address' | 'label')
+                 ('[' pred_value (',' pred_value)* ']' )? )? ';' ;
+pred_value              : snumber
+                        | snumber '..' snumber
+                        | '{' number '}' ;
+
+//---------------------------------------------------------------------------
+// Derived Operand definition.
+//---------------------------------------------------------------------------
+derived_operand_def     : 'operand' IDENT base_list  ('(' ')')?
+                              '{' (operand_type | operand_attribute)* '}' ';'? ;
+
+//---------------------------------------------------------------------------
+// Predicate definition.
+//---------------------------------------------------------------------------
+predicate_def           : 'predicate' IDENT ':' predicate_op? ';' ;
+
+predicate_op            : pred_opcode '<' pred_opnd (',' pred_opnd)* ','? '>'
+                        | code_escape
+                        | IDENT ;
+code_escape             : '[' '{' .*? '}' ']' ;
+
+pred_opnd               : IDENT
+                        | snumber
+                        | STRING_LITERAL
+                        | '[' IDENT (',' IDENT)* ']'
+                        | predicate_op
+                        | operand ;
+
+pred_opcode             : 'CheckAny' | 'CheckAll' | 'CheckNot' | 'CheckOpcode'
+                        | 'CheckIsRegOperand' | 'CheckRegOperand'
+                        | 'CheckSameRegOperand' | 'CheckNumOperands'
+                        | 'CheckIsImmOperand' | 'CheckImmOperand'
+                        | 'CheckZeroOperand' | 'CheckInvalidRegOperand'
+                        | 'CheckFunctionPredicate' | 'CheckFunctionPredicateWithTII'
+                        | 'TIIPredicate'
+                        | 'OpcodeSwitchStatement' | 'OpcodeSwitchCase'
+                        | 'ReturnStatement' | 'MCSchedPredicate' ;
+
+//---------------------------------------------------------------------------
+// Match and convert a number, a set of numbers, and a range of numbers.
+//---------------------------------------------------------------------------
+number                   : NUMBER ;
+snumber                  : NUMBER | '-' NUMBER ;
+tuple                    : '[' snumber (',' snumber)* ']' ;
+range                    : number '..' number ;
+```
+
+
+
+### **Appendix B: Future Directions**
+
+
+#### **Memory Hierarchy**
+
+We need a first class representation of any compiler-managed memory hierarchy. 
+
+Compiler-managed memory
+
+
+
+*   Per level
+    *   Size
+    *   Addressable units
+    *   Speed
+    *   Latency
+    *   Access method(s)
+    *   Banking
+    *   Sharing
+*   Separate address spaces
+    *   Code, Data, I/O, etc
+
+Caches
+
+
+
+*   Per level
+    *   Size
+    *   Type (I, D, I/D)
+    *   Replacement policy
+    *   Mapping (direct, associativity) 
+    *   Line size
+    *   Prefetching
+    *   Miss cost modeling
+    *   etc
+
+Synchronization policies
+
+Virtual Memory
+
+DMA system descriptions
+
+
+#### **Multi-Processor System Topology**
+
+
+### **Appendix C: RISC-V Generated Architecture Description**
+
+This is a complete, automatically generated machine description for RISC-V using our tool to scrape information from tablegen files.  We can automatically generate MDL specifications for all targets that have schedules and/or itineraries.  We include RISC-V here for illustrative purposes.
+
+The “Schedule” td files for RISC-V are approximately 1720 lines of tablegen, describing two full schedule models and one “default” model.  The generated MDL file is ~120 lines of our machine description language.
+
+
+```
+//---------------------------------------------------------------------
+// This file is autogenerated from an LLVM Target Description File.
+//---------------------------------------------------------------------
+import "RISCV_instructions.mdl"
+
+//---------------------------------------------------------------------
+// Pipeline phase definitions
+//---------------------------------------------------------------------
+protected phases RISCV { F1, E[1..57] };
+
+//---------------------------------------------------------------------
+// CPU Description Classes (4 entries)
+//---------------------------------------------------------------------
+cpu RISCV("generic", "generic-rv32", "generic-rv64") {
+}
+
+cpu Rocket("rocket", "rocket-rv32", "rocket-rv64", "sifive-e20", "sifive-e21", "sifive-e24", "sifive-e31", "sifive-e34", "sifive-s21", "sifive-s51", "sifive-s54", "sifive-u54") {
+  protected phases defaults { LOAD_PHASE=3 };
+  issue(F1) s0;
+  func_unit RocketUnitALU<0> U0();
+  func_unit RocketUnitB<0> U1();
+  func_unit RocketUnitFPALU<0> U2();
+  func_unit RocketUnitFPDivSqrt<1> U3();
+  func_unit RocketUnitIDiv<1> U4();
+  func_unit RocketUnitIMul<0> U5();
+  func_unit RocketUnitMem<0> U6();
+}
+
+cpu SiFive7("sifive-7-series", "sifive-e76", "sifive-s76", "sifive-u74") {
+  protected phases defaults { LOAD_PHASE=3 };
+  issue(F1) s0, s1;
+  func_unit SiFive7PipeA<0> U0();
+  func_unit SiFive7PipeB<0>:SiFive7FDiv<1>:SiFive7IDiv<1> U1();
+}
+
+cpu SyntacoreSCR1("syntacore-scr1-base", "syntacore-scr1-max") {
+  protected phases defaults { LOAD_PHASE=2 };
+  issue(F1) s0;
+  func_unit SCR1_ALU<0> U0();
+  func_unit SCR1_CFU<0> U1();
+  func_unit SCR1_DIV<0> U2();
+  func_unit SCR1_LSU<0> U3();
+  func_unit SCR1_MUL<0> U4();
+}
+
+//---------------------------------------------------------------------
+// Functional Unit Groups
+//---------------------------------------------------------------------
+func_group SiFive7PipeAB: SiFive7PipeA, SiFive7PipeB;
+
+//---------------------------------------------------------------------
+// Subunit Definitions (58 entries)
+//---------------------------------------------------------------------
+subunit sub6() {{ def(E1, $0); fus(1); fus(Rocket, 0); }}
+subunit sub7() {{ def(E1, $0); fus(1); fus(SiFive7, 0); }}
+subunit sub8() {{ def(E1, $0); fus(1); fus(SyntacoreSCR1, 0); }}
+subunit sub49() {{ def(E1, $0); fus(RocketUnitALU, 1); fus(RocketUnitB, 1); }}
+subunit sub0() {{ def(E1, $0); fus(RocketUnitALU, 1); }}
+subunit sub41() {{ def(E1, $0); fus(RocketUnitB, 1); }}
+subunit sub56() {{ def(E1, $0); fus(RocketUnitMem, 1); }}
+subunit sub51() {{ def(E1, $0); fus(SCR1_ALU, 1); fus(SCR1_CFU, 1); }}
+subunit sub2() {{ def(E1, $0); fus(SCR1_ALU, 1); }}
+subunit sub42() {{ def(E1, $0); fus(SCR1_CFU, 1); }}
+subunit sub5() {{ def(E1, $0); fus(SCR1_LSU, 1); }}
+subunit sub45() {{ def(E1, $0); fus(SCR1_MUL, 1); }}
+subunit sub57() {{ def(E1, $0); fus(SiFive7PipeA, 1); }}
+subunit sub12() {{ def(E1, $0); fus(SiFive7PipeB, 1); }}
+subunit sub46() {{ def(E1, $1); fus(RocketUnitALU, 1); fus(RocketUnitB, 1); }}
+subunit sub17() {{ def(E1, $1); fus(RocketUnitB, 1); }}
+subunit sub48() {{ def(E1, $1); fus(SCR1_ALU, 1); fus(SCR1_CFU, 1); }}
+subunit sub19() {{ def(E1, $1); fus(SCR1_CFU, 1); }}
+subunit sub18() {{ def(E1, $1); fus(SiFive7PipeB, 1); }}
+subunit sub26() {{ def(E16, $0); fus(SiFive7PipeB&SiFive7IDiv<15>, 1); }}
+subunit sub33() {{ def(E2, $0); fus(RocketUnitFPALU, 1); }}
+subunit sub3() {{ def(E2, $0); fus(RocketUnitMem, 1); }}
+subunit sub20() {{ def(E2, $0); fus(SCR1_LSU<2>, 1); }}
+subunit sub13() {{ def(E2, $0); fus(SiFive7PipeA, 1); }}
+subunit sub35() {{ def(E20, $0); fus(RocketUnitFPDivSqrt<20>, 1); }}
+subunit sub40() {{ def(E25, $0); fus(RocketUnitFPDivSqrt<25>, 1); }}
+subunit sub37() {{ def(E27, $0); fus(SiFive7PipeB&SiFive7FDiv<26>, 1); }}
+subunit sub43() {{ def(E3, $0); fus(RocketUnitMem, 1); }}
+subunit sub52() {{ def(E3, $0); fus(SiFive7PipeA&SiFive7PipeB, 2); }}
+subunit sub4() {{ def(E3, $0); fus(SiFive7PipeA, 1); }}
+subunit sub50() {{ def(E3, $0); fus(SiFive7PipeAB, 1); fus(SiFive7PipeB, 1); }}
+subunit sub1() {{ def(E3, $0); fus(SiFive7PipeAB, 1); }}
+subunit sub34() {{ def(E3, $0); fus(SiFive7PipeB, 1); }}
+subunit sub47() {{ def(E3, $1); fus(SiFive7PipeAB, 1); fus(SiFive7PipeB, 1); }}
+subunit sub25() {{ def(E33, $0); fus(RocketUnitIDiv<33>, 1); }}
+subunit sub27() {{ def(E33, $0); fus(SCR1_DIV<33>, 1); }}
+subunit sub28() {{ def(E34, $0); fus(RocketUnitIDiv<34>, 1); }}
+subunit sub31() {{ def(E4, $0); fus(RocketUnitFPALU, 1); }}
+subunit sub44() {{ def(E4, $0); fus(RocketUnitIMul, 1); }}
+subunit sub39() {{ def(E5, $0); fus(RocketUnitFPALU, 1); }}
+subunit sub32() {{ def(E5, $0); fus(SiFive7PipeB, 1); }}
+subunit sub36() {{ def(E56, $0); fus(SiFive7PipeB&SiFive7FDiv<55>, 1); }}
+subunit sub29() {{ def(E6, $0); fus(RocketUnitFPALU, 1); }}
+subunit sub38() {{ def(E7, $0); fus(RocketUnitFPALU, 1); }}
+subunit sub30() {{ def(E7, $0); fus(SiFive7PipeB, 1); }}
+subunit sub21() {{ fus(1); fus(Rocket, 0); }}
+subunit sub22() {{ fus(1); fus(SiFive7, 0); }}
+subunit sub23() {{ fus(1); fus(SyntacoreSCR1, 0); }}
+subunit sub53() {{ fus(RocketUnitALU, 1); fus(RocketUnitB, 1); }}
+subunit sub9() {{ fus(RocketUnitB, 1); }}
+subunit sub14() {{ fus(RocketUnitMem, 1); }}
+subunit sub55() {{ fus(SCR1_ALU, 1); fus(SCR1_CFU, 1); }}
+subunit sub11() {{ fus(SCR1_CFU, 1); }}
+subunit sub16() {{ fus(SCR1_LSU, 1); }}
+subunit sub24() {{ fus(SCR1_LSU<2>, 1); }}
+subunit sub15() {{ fus(SiFive7PipeA, 1); }}
+subunit sub54() {{ fus(SiFive7PipeAB, 1); fus(SiFive7PipeB, 1); }}
+subunit sub10() {{ fus(SiFive7PipeB, 1); }}
+
diff --git a/llvm/docs/Mdl/RFC.md b/llvm/docs/Mdl/RFC.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/RFC.md
@@ -0,0 +1,46 @@
+
+## MDL: A Micro-Architecture Description Language for LLVM
+
+November 2022          Reid Tatge        [tatge@google.com](mailto:tatge@google.com)
+
+
+#### **TL;DR:**
+
+We’ve created a DSL and compiler for modeling micro-architecture that handles a very broad class of architectures - CPU, GPUs, VLIWs, DSPs, ML accelerators, and embedded devices. This effort grew out of a need to quickly develop and experiment with high-quality compilers and tools to facilitate rapid architecture exploration. We named the DSL “MDL” for “Microarchitecture Description Language”.
+
+While being significantly more expressive than TableGen’s Schedules and Itineraries used in LLVM, MDL is also more concise, and simpler to read and write while supporting a much broader class of embedded and accelerator architectures. We currently can automatically _generate _MDL descriptions for all upstream targets which are in many cases 1/10 the size of the equivalent TableGen descriptions.  We’ve integrated this with LLVM, and are sending out this RFC because we believe it could be valuable to the larger LLVM community. \
+
+
+The MDL compiler, associated tools, and documentation are available as open source (at https://github.com/MPACT-ORG/llvm-project/tree/work), and we would like to explore adding this to the LLVM project, and encourage contributions from others.
+
+
+#### **Background**
+
+Over the last few years, we have been using LLVM to develop a compiler backend for Google’s TPU machine learning accelerators.  TPUs have complex microarchitectures and pose a number of challenges that are not seen in in typical LLVM targets:
+
+
+
+*   Clustered VLIW with partitioned register files.
+*   Extremely deep pipelines with complex hazard conditions
+*   Instructions with functional-unit-specific and/or cluster-specific behaviors
+    *   Non-trivial and/or instance-specific latencies
+    *   Complex resource usage
+    *   Functional-unit-specific register constraints
+*   Shared/allocated encoding resources (instructions need 1..M of N resources)
+*   Explicitly managed hardware resources (register ports, internal datapaths, busses, etc)
+
+While some of these problems manifest in a few upstream targets, this collection of problems is a superset of the problems directly addressed by LLVM - Schedules and Itineraries are simply not sufficient to model everything. Supporting this class of architecture is therefore code-intensive - it takes around 20,000 lines of C++ code to model the TPU sub-targets. This is brittle, hard to write, debug, test, and evolve over time. In contrast, the MDL description for these sub-targets is ~2,000 lines of text.
+
+
+#### **Status**
+
+
+
+*   We’ve created the MDL language and compiler for describing microarchitecture details, a methodology for integrating it with TableGen files for any target, and a set of APIs that can be used in a machine-independent way to inform back-end passes such as bundle-packing, instruction scheduling, and register allocation. 
+*   To facilitate integration with LLVM, we built a tool which scrapes architectural information from TableGen files, and produces our MDL language for all upstream targets.
+*   We’ve modified the CodeGen and MC libraries to (optionally) use our methodology for latency management.
+
+There is a lot more to do. For example, we plan to enhance existing back-end scheduling passes and register allocation passes to cleanly handle a larger class of embedded and accelerator architectures, based on MDL-generated information.
+
+We welcome feedback on the language design and associated tools and use model.  You can find the MDL design documentation, compiler, and other tools in our github repo in llvm/docs/mdl.
+
diff --git a/llvm/docs/Mdl/ResourceGroups.md b/llvm/docs/Mdl/ResourceGroups.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/ResourceGroups.md
@@ -0,0 +1,304 @@
+
+
+
+## Modeling Resource Groups
+
+Reid Tatge        tatge@google.com
+
+
+[TOC]
+
+
+
+### Introduction
+
+The MDL language supports the specification and use of “resource groups”, which is a set of related resources that can be allocated like a pool:
+
+
+```
+	resource group { a, b, c, d, e, f };
+```
+
+
+Resource groups have CPU, Cluster, or Functional Unit Template scope, and can be passed as parameters to functional unit, subunit, or latency templates.  You can pass an entire group to a template as a parameter:
+
+
+```
+	subunit yyy(group);      // reference the entire group
+```
+
+
+Or you can pass references to a member with a C++ “struct” like syntax:
+
+
+```
+	subunit xxx(group.a);    // reference a single member of a group
+```
+
+
+When a group is passed to a template, you can allocate a single member of a group:
+
+
+```
+	def(E3, group:1);        // allocate a single resource from the group
+```
+
+
+Or reference a named item of the group:
+
+
+```
+	def(E3, group.d);        // use a named member of the group
+```
+
+
+Or reference the entire group:
+
+
+```
+	def(E3, group);          // use all the resources in a group
+```
+
+
+However, you cannot cleanly reference a subset of a group (or an arbitrary set of resources). 
+
+
+### Former interpretation of groups
+
+Currently, members of a resource group have the scope of the context they are defined in (CPU, Cluster, or Functional Unit Template).  Resource groups defined in the same scope may define members with the same name, and these names can shadow other resource names defined in the same scope.  So for example, the following is legal:
+
+
+```
+	resource fun;
+resource group g1 { happy, fun, ball };        // Don't tease this
+	resource group g2 { programming, is, fun };
+```
+
+
+In this case, we have defined 9 distinct resources in the same scope (including the group resources):
+
+
+```
+	fun, g1, g1.happy, g1.fun, g1.ball, g2, g2.programming, g2.is, g2.fun
+```
+
+
+The previous compiler allowed you to specify group members by name as long as they are unique in the current context, and they don’t shadow other defined resources.  In this case, “fun” is defined three times, so any use of those must qualify the reference:
+
+
+```
+	func_unit mu_fu fu1(fun, g1.fun, g2.fun);     // passes 3 different resources
+
+
+```
+
+
+Grouped resources with unique names can simply be referenced by their name:
+
+
+```
+	subunit yyy(programming, is, happy);
+```
+
+
+
+### New model: Arbitrary grouping of resources
+
+There is a fairly common need to specify different subsets of a set of defined resources. The MDL has a methodology to support aspects of this, but in the general case we didn’t have a direct syntax for making this easy to specify.  This is particularly common with itineraries, where each stage specifies a different set of resources which can be used by each stage.  For this use case, we’d like to be able to use groups to define subsets of defined resources, for example:
+
+
+```
+	resource res1, res2, res3, res4, res5, res6, res7, res8;
+	resource lows { res1, res2, res3, res4 };
+	resource highs { res5, res6, res7, res8 };
+	resource odds { res1, res3, res5, res7 };
+	resource evens { res2, res4, res6, res8 };
+	resource arbitrary { res1, res4, res5 };
+```
+
+
+In this case, all the group members with the same name refer to the same defined resource (in the current scope). This allows us to use groups to define arbitrary sets of defined resources, rather than defining distinct resources for each member.
+
+In the “fun” example from the previous section, rather than creating nine distinct resources, we would generate only seven: g1, g2, happy, fun, ball, programming, is - ie, all the “fun” members refer to the same “fun” resource.
+
+This is a very minor change in the language interpretation, and would obsolete the feature that two resource groups, defined in the same scope, could have members with the same name. This is of relatively little utility versus being able to define arbitrary subsets of defined resources.
+
+
+### Semantic and Syntax Changes
+
+Since this is primarily a change of the interpretation of resource groups, syntax changes are _required_.  However, we would like to introduce a syntax for shortcutting the specification of a resource group as a template parameter. Consider the following example:
+
+
+```
+	resource group1 { res1, res2, res3 };
+	resource group2 { res3, res4, res5 };
+	resource group3 { res5, res6 };
+	subunit xyzzy(group1, group2, group3);
+```
+
+
+With the new syntax, this defines 3 resource groups and (only) 6 resources (res1..res6).
+
+We introduce a syntax that allows you to define these groups implicitly as part of the instance, so that the explicit group definitions are unnecessary.  We’ll also add syntax to set the default allocation for a resource group - either “one of” or “all of”. 
+
+
+```
+	subunit xyzzy(res1|res2|res3, res3|res4|res5, res5|res6);
+	subunit plugh(res1&res2&res3, res3&res4&res5, res5&res6);
+```
+
+
+Normally defined groups can also be defined with this syntax. Note that all the “operators” (‘,’ ‘&’, and ‘|’) must be identical in a single definition:
+
+
+```
+	resource group1 { res1 | res2 | res3 };
+	resource group2 { res3 & res4 & res5 };
+	resource group3 { res5, res6 };            // equivalent to |
+```
+
+
+When a group declared with “&” is “used” without an explicit allocation (ala x.y), all of its members are used.  When a group declared with “,” or “|” is used, only 1 is allocated (ala x.1). We now have a syntax x.\* which allocates all of a group’s members, regardless of how it is declared.
+
+Implicitly declared groups can be used/declared in functional unit instances and subunit instances only.  They cannot be used in latency instances (ie, in subunit templates), since resources can only be declared in CPUs, clusters, and functional unit templates.  We may add this capability in the future.
+
+As with the current syntax, note that defined group members are promoted to the scope that the group is defined in, so there’s no need to explicitly define the members of the group as normally defined resources. This change would formalize that promotion.
+
+There are a few minor aspects of this new capability that we need to error check.  A resource group definition can have shared bits (“resource x:3”), and/or a phase specification (“resource(E1) x”) and we assume all items in the resource group have the same definition.  If we allow a group to reference already defined resources, we _may_ want to ensure all the resources are the same as the group resource definition (which might be an implicit definition…).  Or not - there may be some value in allowing different members of a resource group to have different phases, for example.
+
+
+### General Design
+
+An important part of this design change is that for descriptions that don’t have groups with identically named members, the behavior doesn’t change, and this change should be transparent.  (None of the existing descriptions have this issue.)
+
+In general, this design simplifies the compiler design of resources quite a bit. It complicates the bundle packing code a bit, since we must provide an explicit list of resource ids to allocate. We may want to handle reference groups and reference arrays the same.
+
+
+#### Parser Changes
+
+We will modify the parser to recognize implicitly defined groups in template instance parameters, and create groups for each of those occurrences.  
+
+
+```
+	subunit xxx(res1 | res2 | res3, res3 & res4);
+```
+
+
+produces (internally):
+
+
+```
+resource anon1 { res1 | res2 | res3 };
+resource anon2 { res3 & res4 };	
+subunit xxx(anon1, anon2);
+```
+
+
+which in turn produces (internally):
+
+	`resource res1, res2, res3, res4;`
+
+
+```
+	resource anon1 { res1 | res2 | res3 };
+	resource anon2 { res3 & res4 };
+	subunit xxx(anon1, anon2);
+```
+
+
+We maintain a table of  groups so that we share definitions across explicit and implicit definitions.
+
+
+#### Promoting Members
+
+In the front-end of the compiler, we preprocess resource group definitions in CPUs, Clusters, and Functional Unit Templates to promote members to the scope they are defined in. While doing this promotion, if the resource already exists, we want to ensure that any phase or shared-bits attribute are the same. As we promote the members of a group, we create a vector of ResourceDef’s for the group definition to link each member to their promoted, defined resources.  Each member contains an index into that list of ResourceDefs.
+
+
+#### Name Lookup
+
+In general, member name lookup is easier.  For unqualified references (like “member”), we can eliminate the separate member-name lookups, since the member would have been promoted to a top-level reference.  For qualified members (like “group.member”) the code can remain the way it is.  We could also simplify it to simply reduce to a pointer to the promoted resource.
+
+
+#### Resource Id Assignment
+
+We no longer need to assign resource ids to either a group or to its declared members.  A group is now simply a set of defined resources, and their associated ids. 
+
+
+#### Accessing Member Ids
+
+Currently, a member’s id is the sum of its group id and its index in the group.  In the new approach, a member’s index in the group is used to index into the group’s vector of ResourceDefs, and we use that resource’s id.
+
+
+#### Writing out Member Id Name Definitions
+
+When we write out definitions for resources, we no longer need to write out ids for resource groups, or their members.  We can simply skip them.
+
+
+#### Building Resource Sets
+
+When we create permutations of pooled resource assignments, we must use a set of resource ids, rather than a simple range.  We should do this the same for arrays and groups. \
+
+
+
+#### Output of the Database For Resource Groups and Arrays
+
+Rather than simply write out an initial resource id and a number of resources, for groups we need to write out a vector of the resource ids in the group.  We may want to create a table of these, since there will be many duplicates.  We will probably want to use the same mechanism for both Arrays and Groups, so that these can be treated the same way in the database and the bundle packer - even though an Array is guaranteed to have consecutive ids.
+
+We modify the PooledResourceRef definition - rather than provide a base resource id for the pool, we instead provide an array of resources associated with the pool.  For example, today a PooledResourceRef looks like this:
+
+
+```
+	static std::vector<PooledResourceRef> PRES_101 
+                {{RefUse,1,0,nullptr,47,2,&POOL_11}};
+```
+
+
+Currently, we only provide the base id of the pool/group, in this example 47.  To implement the new methodology, we instead provide a pointer to an array of ids associated with the pool:
+
+
+```
+	static ResourceId MEMBERS_47 { 23, 43, 39, 35 };
+	static std::vector<PooledResourceRef> PRES_101 
+                {{RefUse,1,0,nullptr,&MEMBERS_47,2,&POOL_11}};
+```
+
+
+
+#### Bundle Packing
+
+As in the database, a “Pool” is no longer a base plus a number of members.  It is now a vector of explicit resource ids as part of the PooledResourceRef object.  Rather than “compute” the resource id’s in a pool, we just use the explicitly enumerated resource ids. (This is a one or two line change in the pool allocation code.)
+
+
+#### TdScan Changes
+
+Currently each stage of an itinerary can specify a set of resources to use in that stage, specifying either all of the resources or just one.  Functional unit templates and subunit templates are defined to have a resource template argument for each stage.  For each CPU, for each functional unit, TdScan generates a separate instance of the functional unit for each permutation of the stage resources.  For example, given the set of InstrStages:
+
+
+```
+InstrStage: cycles=1, units=[ADD1, ADD2], timeinc=-1
+InstrStage: cycles=1, units=[UNIT1, UNIT2], timeinc=-1
+InstrStage: cycles=1, units=[STORE1, STORE2], timeinc=-1
+```
+
+
+We previously generated the following functional unit definitions:
+
+
+```
+	func_unit type name(ADD1, UNIT1, STORE1);
+	func_unit type name(ADD2, UNIT1, STORE1);
+	func_unit type name(ADD1, UNIT2, STORE1);
+	func_unit type name(ADD2, UNIT2, STORE1);
+	func_unit type name(ADD1, UNIT1, STORE2);
+	func_unit type name(ADD2, UNIT1, STORE2);
+	func_unit type name(ADD1, UNIT2, STORE2);
+	func_unit type name(ADD2, UNIT2, STORE2);
+```
+
+
+This is all the permutations of the resource sets associated with the three stages. With the new syntax, we generate the following:
+
+	`func_unit type name(ADD1|ADD2, UNIT1|UNIT2, STORE1|STORE2);`
+
+and let the MDL compiler create the allocation pools to implement the permutations automatically.
+
diff --git a/llvm/docs/Mdl/UsingTheMDLCompiler.md b/llvm/docs/Mdl/UsingTheMDLCompiler.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/UsingTheMDLCompiler.md
@@ -0,0 +1,575 @@
+
+
+## Using TdScan and the MDL compiler
+
+Reid Tatge 	  tatge@google.com
+
+
+[TOC]
+
+
+
+#### **Overview of the process**
+
+This document describes the steps to building the MDL compiler and Tablegen scraper (tdscan) so that you can create and debug MDL instruction descriptions for LLVM.
+
+The “normal” process of using an MDL machine description for a target is to write the overall architecture description by hand, and generate an instruction description by scraping information from the tablegen description of the target.  The generated instruction description is explicitly imported by the MDL compiler to tie the hand-written architecture description to the instruction descriptions in the target’s tablegen files.
+
+To keep the architecture in sync with the LLVM description, we extract and scrape the tablegen information as part of the compiler build process.  The extraction process uses tablegen to write out all the target information, and the scraper scans this file and produces an MDL-based description of instructions, operands, registers, and register classes.  This is imported by the architecture description so that the two descriptions are compiled together.  This produces .cc and .h files that can be included in the LLVM build.
+
+
+
+#### **Scraping Information from Tablegen**
+
+To synchronize the MDL for a target with LLVM, we need to extract all of the instruction, operand and register definitions from the tablegen description. The first step in this process is to get tablegen to dump its internal representation of the target description to a plain text file.
+
+
+##### **Create tablegen information for a target:**
+
+This step uses the normal tablegen program to produce a dump of all the tablegen information for any LLVM target.
+
+
+
+*   <code>export LLVM=&lt;<em>path to llvm</em>></code>
+*   <code>export TARGET=&lt;<em>family-name</em>></code> \
+Where family-name is one of AArch64, AMDGPU, ARC, ARM, AVR, BPF, CSKY, Hexagon, Lanai, M68k, Mips, MSP430, NVPTX, PPC, RISCV, Sparc, SystemZ, VE, WebAssembly, X86, XCore
+*   <code>.../clang-tblgen -print-records \
+  -I $LLVM/llvm-project/llvm/include/ \
+  -I $LLVM/llvm/include/llvm/IR/ \
+  -I $LLVM/llvm-project/llvm/lib/Target/$TARGET/ \
+  $LLVM/llvm-project/llvm/lib/Target/$TARGET/$(TARGET).td > ~/$(TARGET).txt</code>
+
+This creates the file &lt;family\_name>.txt, which can be processed by “tdscan” to produce an MDL file that describes the ISA of the processor family.
+
+
+##### **Scraping the tablegen file to produce ISA information**
+
+In this step we use “tdscan” to process the tablegen output file, which produces an MDL language description of the target architecture.  \
+
+
+
+
+*   <code>export TARGET=&lt;<em>family-name</em>></code>
+
+    Where family-name is one of: AArch64, AMDGPU, ARC, ARM, AVR, BPF, CSKY, Hexagon, Lanai, LoongArch, M68k, Mips, MSP430, NVPTX, PPC, RISCV, Sparc, SystemZ, VE, WebAssembly, X86, XCore
+
+*   `…/tdscan -–family_name=$TARGET $(TARGET).txt`
+
+ \
+This produces the file $(TARGET)\_instructions.mdl, which contains MDL descriptions for all instructions, operands, registers, and register classes defined in the td files for that target.
+
+Anomalies: 
+
+
+
+*   For Sparc, the family name is actually “SP”, while the file name is “Sparc.txt”.  
+*   For PowerPC, the name of the td file is PPC.td  which resides at //third\_party/llvm/llvm-project/llvm/lib/Target/**PowerPC**
+
+Note: Without the –family\_name argument, tdscan uses the input file name as the target name (minus the filename extension).  As noted above, in general the family name matches the input file name.
+
+**Scraping the tablegen file to produce a full architecture spec**
+
+If the tablegen description contains Schedules and/or Itinerary descriptions, you can also have tdscan produce an MDL architecture spec for a processor.  Currently, this applies to the following targets: AArch64, AMDGPU, ARM, Hexagon, Lanai, MIPS, PPC, RISCV, Sparc (SP), SystemZ, and X86.
+
+
+
+*   <code>export TARGET=&lt;<em>family-name</em>></code>
+
+    Where family-name is one of: AArch64, AMDGPU, ARM, Hexagon, Lanai, Mips, PPC, RISCV, Sparc, SystemZ, X86 (same family name caveat for Sparc)
+
+*   `…/tdscan --gen_arch_spec -–family_name=$TARGET $(TARGET).txt`
+
+This will produce both the instructions file ($(TARGET)\_instructions.mdl) and the architecture spec file ($(TARGET).mdl).  The generated architecture spec will explicitly import the instruction description file.  Compiling $(TARGET).mdl with the MDL compiler will produce an instruction database for the processor family.
+
+
+#### **Compiling a Machine Description**
+
+Generally, we separate the instruction descriptions from the architecture spec into separate .mdl files, and the architecture spec explicitly imports the instruction descriptions. So to compile a full machine description, we invoke the compiler on the architecture spec:
+
+
+```
+.../mdl CPU.mdl
+```
+
+
+This will create three files: CPUGenMdlInfo.inc,  CPUGenMdlTarget.inc, and CPUGenInfo.h, which contain the database of architecture and instruction information that is imported into LLVM.
+
+
+##### **Command line options**
+
+You can invoke the compiler with “--help” to get a brief description of the command line options. The following options are supported, and discussed in more detail below:
+
+
+```
+    --check_all_operands (Check references to all operands - not just registers);
+      default: false;
+    --check_usage (Check subunit, reference, and resource usage);
+      default: false;
+
+    --dump_fus (Dump functional unit instantiations); default: false;
+    --dump_instr (Dump instruction information); default: false;
+    --dump_llvm_defs (Dump LLVM definitions); default: false;
+    --dump_preds (Dump user-define predicates); default: false;
+    --dump_resources (Dump resource ids); default: false;
+    --dump_spec (Dump entire mdl specification); default: false;
+    --dump_sus (Dump subunit instantiations); default: false;
+
+    --fatal_warnings (Treat warnings as errors); default: false;
+    --import_dir (import file dir); default: "";
+    --output_dir (output file dir); default: "";
+    --warnings (Print warnings); default: true;
+```
+
+
+
+##### **Options that help debug a machine description under development**
+
+
+###### **–check\_usage:**
+
+This option checks for possible errors in the description:
+
+
+
+*   It checks that every register operand is explicitly referred to any any latency rules that apply to that instruction.
+*   It warns for any latency template reference (use, def, etc) that never appear to apply to any instruction. 
+*   It warns for any unused subunit template (never referred to in any instruction.
+*   It warns for any resource that is never referenced anywhere.
+
+These are not errors, but could indicate that something is incorrectly modeled.
+
+
+###### **–check\_all\_operands:**
+
+This option does the same checks that –check\_usage performs, but also checks that every single operand - even non-register operands - always is referenced.  This is also not an error, but simply a diagnostic tool.
+
+
+###### **–dump\_instr:**
+
+This option dumps comprehensive information (to std::out) about every behavior of every instruction on every subtarget.  
+
+**NOTE: **There are a LOT of instruction descriptions - each instruction has entries for the cross product of each processor, functional unit, and issue slot it can run on.  You'll notice that the entries are often almost identical except for where they run.  Internally, identical aspects of the description are shared - across different instructions, functional units, and processors - so this isn't as bad as it might seem.  If you look through the resource references, you should see EXACTLY what each instruction does in each context it can run in.  A few thoughts:
+
+
+
+*   If you ignore functional unit and issue slot resources, many of the instances of an instruction are going to be identical (from the perspective of a simulator, for example). 
+*   There are quite a few instructions that have different operand and resource latencies based on which functional unit they run on.  So the only difference between their descriptions will be a single latency (operand or resource). The good news is that the representation of all of this is pretty compact.
+*   All of this information is encoded in the output file (&lt;family>.mdl.cc).  
+
+So there is a massive amount of information here - not to worry: the compiler deduplicates everything, so there is very little redundancy in the generated database.  This is just the “raw” information the compiler generates internally.
+
+Here's what the output of --dump\_instr looks like:  \
+
+
+
+```
+Instruction: MOV16rm(GR16 dst, i16mem src)
+                flat(GR16 dst, (i16mem.ptr_rc) (src.0), (i16mem.i8imm) (src.1), 
+                    (i16mem.ptr_rc_nosp) (src.2), (i16mem.i32imm) (src.3), 
+                    (i16mem.SEGMENT_REG) (src.4)) {    
+    subunit(sub579,sub1976,sub1977,sub1978,sub1979,sub1980,sub1981,sub1982,sub1983,
+            sub1984,sub2767,sub2768,sub2769,sub2770); }
+    Subunit: AlderlakeP.U11
+      Operand references:
+      ===>  def.p(E6, GR16:$dst[0])
+      Resources:
+            use.p(F1,U11{12})
+      Pool Resources:
+      Architectural Register Constraints:
+
+
+
+Instruction: MOV16rm(GR16 dst, i16mem src)
+                flat(GR16 dst, (i16mem.ptr_rc) (src.0), (i16mem.i8imm) (src.1),      
+                    (i16mem.ptr_rc_nosp) (src.2), (i16mem.i32imm) (src.3),
+                    (i16mem.SEGMENT_REG) (src.4)) { subunit(sub579,sub1976,sub1977,sub1978,sub1979,sub1980,sub1981,sub1982,sub1983,
+        sub1984,sub2767,sub2768,sub2769,sub2770); }
+    Subunit: Znver1.U0
+      Operand references: 
+      ===>  def.p(E5, GR16:$dst[0])
+      ===>  use.p(E5, i16mem:$src.0[1])
+      Resources:
+            use.p(F1,U0{1})
+      Pool Resources:
+      Architectural Register Constraints:
+```
+
+
+**_How to Interpret MDL Debug Output_**
+
+Each instruction record describes a single behavior of an instruction on a particular processor and functional unit.  For each instruction, we write out:
+
+
+
+*   The instruction name (per LLVM) and the operand types/names as declared in llvm.   Some of these operands are composites of other operands.
+*   The "flat" operand list: each composite operand is expanded to its components as discrete operands.  This is the "real" operand list.
+*   The "Subunit": the processor and functional unit names for this instance of the instruction.
+*   All of this instruction's operand references, and the name of the pipeline phase they happen in (E1, etc).  This includes operand-related resource references, if any.
+*   All of this instruction's resource references, and when they happen
+*   All of this instruction's pooled resource references, and when they happen.
+*   Any architectural register constraints imposed on the instruction by this functional unit (most CPU’s don’t have these)
+
+**Operands: \
+**The operand references have the syntax (in the output):
+
+      ` &lt;opcode> (&lt;protection>)? '(' &lt;pipeline_phase> ','  \
+                                 &lt;operand_specification> ','             \
+                                 (&lt;resource_references>)? ')'  \
+`
+
+where the opcodes are "use", "def", “predicate”.  &lt;Protection> is what kind of pipeline protection is used for this reference (protected, unprotected, hard), one of “.p”, “.u”, or “.h”.
+
+**Resources:**
+
+The resource references are the same, without the operand reference component.
+
+      ` &lt;opcode> (&lt;protection>)? '(' &lt;pipeline_phase> ',' &lt;resource_references> ')' `
+
+An operand specification has the syntax:
+
+
+```
+    <operand_type> ':' '$' <operand_name> '[' <operand_index> ']'
+```
+
+
+An example: GPR:$x[2]  refers to operand number 2 (in the flat operand list), called "x", which has operand type GPR. 
+
+The resource references have the syntax:
+
+
+```
+    <name> '{' <resource_id> '}'
+```
+
+
+An example: alu1{2}    refers to a resource "alu1" which has a resource id of 1.
+
+**Pooled Resources:**
+
+Pooled resources have a slightly more complex syntax:
+
+
+```
+    <name> '{' <resource_id> '}' '[' <range> ']'  
+                                   (':' <attribute>)* '-->' <resource_count>
+```
+
+
+An example: imm{26}[0..3]:size:bits-->2 refers to the "imm" resource, resource id 26, a subrange of members 0..3 with "size" and "bits" attributes, associated with operand 2.
+
+Pooled resources also have a “subpool id” and “size requests” information. 
+
+
+###### **--dump\_resources:**
+
+Write descriptions of all defined resources to std::out. 
+
+For each subtarget, we print a set of resource definitions, followed by a list of _pooled _resource definitions (if the description includes any resource pools).  
+
+**_Example Resource Dump:_**
+
+
+```
+Resources defined for 'RISCV' ---------------------------------------
+fake.RISCV.end : 1
+
+Pooled resources defined for 'RISCV' --------------------------------
+
+Resources defined for 'Rocket' ---------------------------------------
+Funcunit.Rocket.__.U0 : 1,      cycles: [0..0] <Use>
+Funcunit.Rocket.__.U1 : 2,      cycles: [0..0] <Use>
+Funcunit.Rocket.__.U2 : 3,      cycles: [0..0] <Use>
+Funcunit.Rocket.__.U3 : 4,      cycles: [0..0] <Use>
+Funcunit.Rocket.__.U4 : 5,      cycles: [0..0] <Use>
+Funcunit.Rocket.__.U5 : 6,      cycles: [0..0] <Use>
+Funcunit.Rocket.__.U6 : 7,      cycles: [0..0] <Use>
+Funcunit.Rocket.__._default_ : 8,       cycles: [0..0] <Use>
+Issue.Rocket.__.s0 : 9
+fake.Rocket.end : 10
+
+Pooled resources defined for 'Rocket' --------------------------------
+
+Resources defined for 'SiFive7' ---------------------------------------
+Funcunit.SiFive7.__.U0 : 1,     cycles: [0..0] <Use>
+Funcunit.SiFive7.__.U1 : 2,     cycles: [0..0] <Use>
+Funcunit.SiFive7.__.U1 : 3,     cycles: [0..0] <Use>
+Funcunit.SiFive7.__.U1 : 4,     cycles: [0..0] <Use>
+Funcunit.SiFive7.__._default_ : 5,      cycles: [0..0] <Use>
+Issue.SiFive7.__.s0 : 6
+Issue.SiFive7.__.s1 : 7
+fake.SiFive7.end : 8
+
+Pooled resources defined for 'SiFive7' --------------------------------
+```
+
+
+**Interpreting Resource Dumps**
+
+For each subtarget, we print a set of resource definitions, and a second list of _pooled _resource definitions.
+
+**Resource definitions:**
+
+In the resource definition list, each line describes a single resource or pooled resource on a single processor, including its fully-qualified name. There are three classes of resources reflected in their names: functional units, issue slots, and every explicitly defined resource.  Each line has the syntax:
+
+
+```
+   <resource_name> ':' <resource_id(s)>  
+	'cycles:' '[' <earliest_latest_cycle> ']' '<' <reference_types> '>'
+```
+
+
+The resource name reflects the type and context of how a resource is defined: 
+
+
+
+*   “Funcunit” describes the instance of a functional unit (in a CPU definition).
+*   “Issue” is a resource defined as an issue slot.
+*   “Resource” is any other explicitly defined resource.
+*   After the initial identifier, the rest of the name indicates the context of the definition: the CPU, the optional cluster name, the optional functional unit name, followed by the actual defined resource name.
+
+Following its name, we list its resource id (or resource ids if its a pool), its earliest and latest reference cycles, and the type of references seen.
+
+A few examples:
+
+
+    A functional unit, id=2, "used" in cycle 0:
+
+
+```
+      Funcunit.Rocket.U1 : 2,      cycles: [0..0] <Use>
+```
+
+
+
+    An issue slot, id=6 (no explicit references)
+
+
+```
+      Issue.SiFive7.s0 : 6
+```
+
+
+
+    A resource pool, ids 38-40, "used" in cycle 0:
+
+
+```
+      Resource.CPU.pool : [38..40], cycles: [0..0] <Use>
+```
+
+
+
+    A resource pool, ids 41-42, earliest Hold/Reserve cycle 2, latest Hold/Reserve cycle 11:
+
+
+```
+      Resource.CPU.my_pool : [41..42],  cycles: [2..11] <Hold Reserve>
+```
+
+
+
+      A single resource, id=56, earliest cycle=2, latest cycle=9:
+
+
+```
+      Resource.CPU.my_res : 56, cycles: [2..9] <Hold Reserve>
+```
+
+
+**Pooled resource definitions:**
+
+The resource pool descriptions describe each allocation subpool.  Subpools are automatically created by the MDL compiler for every resource pool (all of which are listed in the resource definitions output). 
+
+The order of subpools is important.  For each resource, the compiler allocates a subpool for every allocation request of a particular subrange of resources, and for a particular number of resources.  The subpools (for each pool) are then ordered such that the most restricted subranges are allocated first, and for each subrange the biggest allocations are done first. 
+
+By parsing all allocations into these subpools, we can use a greedy algorithm and produce the best possible allocation.
+
+**Note:** Pool allocation is actually moderately limited in its ability to deal with allocation across pipeline phases.  In general, you want resource pools that are allocated to be defined with a specific pipeline phase (ie: resource(F1) res[5];).  The MDL compiler doesn’t check this (although it could, and probably should).
+
+
+###### **–fatal\_warnings:**
+
+Treat warning messages as fatal errors.
+
+
+###### **–warnings, –nowarnings:**
+
+Print warnings (by default this is on.)** **
+
+
+##### **Options that help debug the compiler**
+
+
+###### **–dump\_preds:**
+
+Dumps all predicate definitions scraped form LLVM to std::out (in a format similar to the way they appear in tablegen files.)
+
+
+###### **–dump\_spec:**
+
+Dumps the internal representation of the entire MDL specification to std::out. \
+
+
+
+###### **–dump\_sus:**
+
+Dump every instantiated subunit to std::out.  This shows exactly which resources are passed down to the subunit, and subsequently to the latency template and all the associated latency rules (defs, uses, etc).
+
+
+#### **MDL Compiler Output**
+
+The MDL compiler generates three C++ files (two .inc files and one .h file) that encapsulate the information derived from the input machine description file. Generally, the names of MDL input files are of the form _<family>.mdl_, where _<family>_ is the llvm target name.  The output file names are correspondingly _<family>GenMdlInfo.inc, &lt;family>GenMdlTarget.inc,_ and _<family>GenMdlInfo.h_.
+
+The types of these objects used in the database files are declared in 
+
+llvm-project/llvm/include/llvm/MC/MDLInstrInfo.h
+
+
+    llvm-project/llvm/include/llvm/MC/MDLInfo.h
+
+
+##### **Generated header file: &lt;family>MDLGenMdlInfo.h**
+
+This generated file contains a few constant definitions that describe overall parameters of the architecture family, and constants that associate defined resources with ids. All of these definitions are wrapped in the “llvm” namespace. 
+
+
+###### Global Maximums
+
+There are a few constants defined that apply to all CPUs (subtargets) defined in a single machine description.  These are the overall maximum values versus the values that are defined on a per-subtarget basis (in the generated C++ CpuInfo object, described later). The following constants are defined:
+
+
+    **kMaxResourceId:**
+
+
+        The maximum resource id allocated over all subtargets.
+
+
+    **kMaxUsedResourceId:**
+
+
+        The maximum resource id that appears in “use” statements.  These are generally defined for pipeline hazards, functional units, and issue slots, If the compiler uses a dense set representation for resources associated with these things, this constant indicates the size needed to represent them all.
+
+
+    **kMaxPipePhase:**
+
+
+        The latest pipe phase in which resources are “used”.  Paired with kMaxUsedResourceId, this constant can be used to implement dense sets for used resources.
+
+
+    **kMaxIssue:**
+
+
+        Indicates the maximum number of instructions that can be issued in parallel. This is primarily used for VLIW architectures, but applies to any architecture that can do parallel issue of instructions (any superscalar processor)
+
+
+    **kMaxPools:**
+
+
+        Number of resource pools managed by the compiler
+
+
+    **kMaxPoolCount**
+
+
+        Maximum number of resources allocated out of any pool.
+
+There’s honestly not much use for these, since all of these values are stored in the CpuInfo object associated with each subtarget.  
+
+
+###### Resource Definitions
+
+Every resource defined in a machine description is given a distinct C++ constant int definition that allows compiler code to access its value.  Each defined resource is wrapped in a namespace for the family, CPU, and (optional) cluster that it is defined in. Structured resource members are additionally wrapped in the enclosing resource definition. Globally defined resources are inherited by all CPU’s in the family.  Using this methodology, each defined resource has a unique name in its namespace hierarchy.
+
+We don’t currently provide names for resources defined in functional units - primarily because there is no current utility for providing access to them from C++.  
+
+This is best illustrated with an example:
+
+
+```
+    family FAM;
+
+    resource global_res;
+
+    cpu CPU {
+    resource cpu_res;
+    	func_unit alu cpu_alu();
+    cluster CLU {
+    		resource cluster_res;
+    	      resource struct { f1, f2 };
+    		issue slot0, slot1;
+    		func_unit alu alu0();
+    		func_unit alu alu1();
+    	}
+    }
+```
+
+
+This generates the following definitions in the output header file:
+
+
+```
+    namespace llvm {
+      namespace FAM {
+        namespace CPU {
+          const int global_res = 1;           // FAM::CPU::global_res
+          const int cpu_res = 2;              // FAM::CPU::cpu_res
+    	const int cpu_alu = 3;              // FAM::CPU::cpu_alu (func unit)
+          namespace CLU {
+            const int slot0 = 4;              // FAM::CPU::CLU::slot0 (issue slot)
+            const int slot1 = 5;              // FAM::CPU::CLU::slot1 (issue slot) 
+            const int alu0 = 6;               // FAM::CPU::CLU::alu0 (func unit)
+            const int alu1 = 7;               // FAM::CPU::CLU::alu1 (func unit)
+            const int cluster_res = 8;        // FAM::CPU::CLU::cluster_res
+            namespace struct {
+              const int f1 = 9;               // FAM::CPU::CLU::struct::f1
+              const int f2 = 10;              // FAM::CPU::CLU::struct::f2
+            }
+          }
+        }
+      }
+    }
+```
+
+
+
+##### **Generated C++ file: &lt;family>MdlInstrInfo.inc**
+
+The generated C++ file contains initializations of the database that contains all of the architecture information for all defined subtargets.  These objects are all defined in “llvm\_instruction\_info.h”.
+
+The top level object is a CpuTable, that contains a dictionary which maps llvm subtarget names to CpuInfo objects.  CpuTable has a single method:
+
+	CpuInfo \*cpu(std::string subtarget) \
+Which returns a pointer to the allocated and initialized CpuInfo object for the specified subtarget. A compiler can use as many subtargets as needed, although typically only one is selected, allocated, and initialized.
+
+Each CpuInfo object contains the following information for a single subtarget:
+
+
+
+*   The number of resources
+*   The number of “used” resources
+*   The number of pipeline phases that use resources
+*   The maximum number of instructions that can issue in parallel
+*   The number of resource pools that must be allocated for instructions
+*   The earliest pipeline phase of operand uses
+*   A function point to initialize the subtargets subunit table.
+*   A memoized pointer to the initialized subunit table.
+
+The first 6 integers are subtarget-specific values related to the globally maximal values defined in the generated header file (described earlier), and are (or can be) used to size various objects used in scheduling.  The subunit table is automatically allocated and initialized and contains information about each instruction’s behaviors for the selected subtarget.
+
+The subunit table contains, for each instruction, a set of instruction behaviors.   Each behavior is described as a single Subunit, which contains pointers to:
+
+
+
+*   A set of operand references
+*   A set of resource uses
+*   A set of resource holds
+*   A set of resource reservations
+*   A set of pooled resource references
+*   A set of context-specific register constraints
+
+All of these sets are shared between subunits, and subunits are shared between instructions with identical behaviors. 
+
+
+
+
diff --git a/llvm/docs/Mdl/VirtualPredicateFunctions.md b/llvm/docs/Mdl/VirtualPredicateFunctions.md
new file mode 100644
--- /dev/null
+++ b/llvm/docs/Mdl/VirtualPredicateFunctions.md
@@ -0,0 +1,278 @@
+
+
+## Virtual Predicate Functions
+
+Reid Tatge           tatge@google.com
+
+
+[TOC]
+
+
+
+### Background
+
+In LLVM, latencies of instructions are modeled by associating Read- or Write-resources with instruction operands.  Briefly, the “latency” of an operand is represented by the “latency” of the resource.  Resources are primarily associated with “output” operands - operands which write results to registers, but can also be associated with “input” operands.  
+
+To support variable latencies on reads and writes, Tablegen uses ReadVariant and WriteVariant records which each associate a set of explicitly predicated resources with a single read or write resource.  At compile time, the predicates are evaluated to determine which predicated resource is to be used for a particular read or write. 
+
+LLVM has a predicate language which can be used to check the number, type, and contents of an instruction’s operands.  The language also includes the ability to call a C++ function to decide whether the predicate applies to a particular instruction instance. 
+
+LLVM has two distinct instruction internal representations (IRs) that the predicates can be applied to: MachineInstr and MCInst, which are packaged in separate libraries.  MCInst is a low-level instruction representation, suitable for writing assemblers and object-code analysers.  MachineInstr is the instruction representation used in the LLVM code generator, and is a much higher-level representation. Predicates can apply to either representation, with the caveat that since MachineInstr carries more semantic information, predicates operating on that can do more detailed tests in some cases. 
+
+While most of the predicate language applies equally to either representation (such as “number of operands”, or “type of operand”), you can specify a call to MCInst- or MachineInstr-specific functions. Therefore, a predicate function can reference different functions in either (or both) libraries. 
+
+This isn’t a problem when both the Target and MC libraries are included in an application, which is the typical use case.  However, a few applications include _only_ MC, which leads to link problems when a predicate function references functions in the Target library.  
+
+The TableGen solution produces two separate schedule-class resolution functions for MCInst and MachineInstr, and they inline all the specialized predicate functions.  These two functions are quite large (in some cases over 4000 LOC), and are mostly logically identical functions, which only do different things when a predicate specifies a representation-specific function call. All the predicate evaluation is folded into these two functions (resolveScheduleClass() for MachineInstr, and resolveVariantSchedClassImpl() for MCInst). 
+
+In the MDL infrastructure, each distinct predicate is placed in it a separate C++ function, and called from representation-independent functions that handle both instruction representations.  
+
+This creates a problem in the uncommon situation where the Target library isn’t included in an application, but a Target function was called in an MachineInstr predicate function, leading to undefined function references at link time. In the MDL generated code, its not exactly feasible to refactor the higher-level functions, since they are called from representation-independent tables, and/or from functions referenced in the representation-independent database. Note that this is only a problem for ARM and AArch64 processor families.
+
+To clarify the problem:
+
+
+
+*   TableGen generates two functions, which look roughly like this: (many details omitted, and in these examples the string &lt;MIpred_n_(MI)>_ _or &lt;MCpred_n_(MI)>_ _represent the inlined body of a predicate function, not necessarily a function call)
+
+	
+
+	<code>unsigned resolveScheduleClass(unsigned SchedClass, <strong>MachineInstr</strong> *MI) {</code>
+
+
+```
+		switch (SchedClass) {
+			case 1:
+            if (<MIpred(MI)>) return 1000;
+				else if (<MIpred2(MI)>) return 1001;
+				break;
+			case 2:
+            if (<MIpred1(MI)>) return 1002;
+				else if (<MIpred4(MI)>) return 1003;
+				…
+				break;
+			…
+			Case 400:
+            If (<MIPred123(MI)>) return 4321;
+            if (<MIpred333(MI)>) return 4322;
+            break;
+    }
+    return 0;
+    }
+
+unsigned resolveVariantScheduleClassImpl(unsigned SchedClass, MCInst *MI) {
+		switch (SchedClass) {
+			case 1:
+            if (<MCpred1(MI)>) return 1000;
+				else if (<MCpred2(MI)>) return 1001;
+				break;
+			case 2:
+            if (<MCpred1(MI)>) return 1002;
+				else if (<MCpred4(MI)>) return 1003;
+				…
+				break;
+			…
+			Case 400:
+            If (<MCpred123(MI)>) return 4321;
+            if (<MCpred333(MI)>) return 4322;
+            break;
+    }
+    return 0;
+	}
+```
+
+
+
+    The first function (resolveScheduleClass) is called by Target library functions, and operates on MachineInstr objects.  The second function is called by MC library functions, and operates solely on MCInst objects.
+
+
+    Note that if a particular schedule class doesn’t have an appropriate MachineInst or MCInst predicate, that case is simply not implemented in the switch statements for the function, and the function returns a value of 0 (ie invalid).  TBH, this is kind of odd.
+
+
+
+*   In contrast, the MDL compiler currently produces this:
+
+	
+
+	`bool Pred1(Instr *MI) {`
+
+
+```
+		if (MI->is_MC()) return <MCpred1>(MI);
+		if (MI->is_MI()) return <MIpred1>(MI);
+		return false;
+}
+bool Pred2(Instr *MI) {
+	…
+}
+…
+bool Pred123(Instr *MI) { … }
+bool Pred333(Instr *MI) { … }
+```
+
+
+
+    In the MDL infrastructure, Instr objects have separate constructors for MCInst and MachineInstr objects, so that the client functions (including predicate functions) can support both representations.
+
+
+    The problem is that the MI\_Pred() functions are (for ARM and AArch64) sometimes defined in the Target library, so that library must be included to avoid link errors.
+
+
+### Solutions
+
+Typically, there are only a handful of predicate functions (for either representation).  We could do a few things to hack this solution:
+
+
+
+1. Always include the Target library if the MC library is included.  This seems heavy handed from an LLVM perspective.  Its easy but won’t be accepted by the community.  It only affects a handful of LLVM tools, mostly tools for manipulating object code.
+2. Move the function definitions to a separate file, and include the file associated with the linked libraries. There’s not an automated way to do this.
+3. Automatically generate fake stubs for each of the referenced MachineInst functions, and include the stubs when the Target library isn’t included.   If we could guarantee the order of library inclusion, perhaps this approach could work.
+4. Related to approach 3: Automatically generate “weak-attributed” empty stubs of each referenced MachineInst predicate function and include them in the MCInst library.  This is great but there doesn’t appear to be a standard way to create weak references.
+5. Simulate virtual functions: generate two tables - one for MCInst functions and one for MachineInstr functions, and use common indexes to reference the appropriate table.  Add the table definitions to the appropriate library, and add them to the CpuTable object separately (in the constructors.)  You can then access the tables via the CpuTable object, which is accessible anywhere.
+
+Method 5 is the most involved, and for most targets isn’t even necessary (and the tables will be empty).  But its also completely transparent to the build process.  
+
+I’d prefer to use weak function references, if I can figure out how to generate them portably.
+
+In the absence of weak function support (option 4), we could implement approach 3 if we could  guarantee that the library containing MachineInstr is _always _included before MCInst.
+
+Update: can’t find a reasonable way to do weak references portably, so that option is out.  Option 3 doesn’t really work, since … make.   So option 5 is the only viable option, and thats whats implemented.  Implementation described below.
+
+
+##### **Simulating virtual functions**
+
+The Target library explicitly includes the MC library.  So we only need to virtualize MachineInstr predicate functions.  We’ll add a pointer to the Target’s predicate table to CpuTableDef.
+
+In this method, we would (optionally, as needed) create an vector of function pointers in the CPU table to contain pointers to MachineInstr predicate functions.
+
+
+```
+	std::vector<PredFunc> InstrPredicates { MI_Pred1, …, MI_Predn };
+```
+
+
+In the CpuTable object, we would include a pointer to this array:
+
+
+```
+	std::vector<PredFunc> *instr_predicates;
+```
+
+
+The MDL compiler would generate an auto-initialization of this array in a file that can be included by the Subtarget module, and the Subtarget constructor would register the generated definition to the CpuTable object.
+
+So, for example, if we currently have a predicate function that looks like this:
+
+
+```
+	static bool PRED_3(const Instr *MI) {
+    return ((static_cast<const ARMInstrInfo*>(MI->tii())
+                ->getNumLDMAddresses(*MI->mi())+1)/2==1);
+```
+
+
+We would get this:
+
+          `  // generated in an inc file included by Subtarget`
+
+
+```
+	static bool MI_Pred_3(const Instr *MI) {
+    return ((static_cast<const ARMBaseInstrInfo*>(MI->tii())
+                ->getNumLDMAddresses(*MI->mi())+1)/2==1);
+```
+
+
+	…
+
+
+```
+	std::vector<PredFunc> InstrPredicates { MI_Pred_1, …, MI_Pred_3, …, MI_Pred_n };
+
+	// generated in the MDL output inc file
+static bool PRED_3(const Instr *MI) { 
+    return MI->isMI() && MI->evaluate_predicate(3); }
+```
+
+
+Here’s a more complex example that includes MCInst and MachineInstr predicates:
+
+
+```
+    static bool PRED_36(const Instr *MI) {
+      return ((MI->isMC() ? ARM_MC::isCPSRDefined(*MI->mc(), MI->mcii())
+                          : static_cast<const ARMBaseInstrInfo *>(MI->tii())
+                                ->ARMBaseInstrInfo::isCPSRDefined(*MI->mi())) &&
+              (MI->isMC()
+                   ? ARM_MC::isPredicated(*MI->mc(), MI->mcii())
+                   : static_cast<const ARMBaseInstrInfo *>(MI->tii())->isPredicated(
+                         *MI->mi())));
+    }
+```
+
+
+We would get this:
+
+
+```
+// generated in an inc file included by Subtarget
+    static bool MI_Pred_5(const Instr *MI) { 
+    	return ((static_cast<const ARMInstrInfo*>(MI->tii())
+                                ->ARMBaseInstrInfo::isCPSRDefined(*MI->mi()))
+    }
+    static bool MI_Pred_6(const Instr *MI) {
+    	return static_cast<const ARMBaseInstrInfo *>(MI->tii())->isPredicated(
+                         *MI->mi())));
+    }
+
+std::vector<PredFunc> InstrPredicates { MI_Pred_1, …, MI_Pred_n };
+
+	// generated in the MDL output inc file
+    static bool PRED_36(const Instr *MI) {
+      return ((MI->isMC()
+        ? ARM_MC::isCPSRDefined(*MI->mc(), MI->mcii())
+            : MI()->evaluate_predicate(5)) &&
+              (MI->isMC()
+        ? ARM_MC::isPredicated(*MI->mc(), MI->mcii())
+        : MI()->evaluate_predicate(6)));
+    }
+```
+
+
+
+##### Library-based solution
+
+In this solution, we create stubs in the MCInst library for MachineInstr functions which are referenced in predicate functions.  These stubs are put in a separate translation unit and added to the MC library.  In this approach, we need to ensure that the MachineInstr library is ALWAYS included first so that its predicate functions are found if the library is included.
+
+Since we can’t really enforce the order of linking, this probably isn’t really viable.
+
+
+##### Side Note
+
+As mentioned, only two targets specify predicates with Target library functions - ARM and AArch64.  And only a handful of tools include MC but not Target.  (Note that Target explicitly includes MC).  A rough stab at those tools/utilities are:
+
+
+
+*   Objcopy
+*   DwarfLinker
+*   Interface Stub
+*   llvm-mca 
+*   llvm-mc
+*   llvm-nm
+*   llvm-ml
+*   llvm-libtool-darwin
+*   sancov
+*   llvm-cfi-verify 
+*   llvm-objdump 
+*   llvm-jitlink
+*   llvm-profgen 
+*   llvm-rtdyld 
+*   llvm-dwarfdump 
+*   llvm-ar 
+
+ 
+
+So, unfortunately, this is a small problem for a small number of architectures, for a small number of tools.  It seems the virtual function table is kind of overkill for this little problem. It would be nice if we could just hack this for the two architectures. 
+
+But the good news is that there are so few instances of this, the cost incurred by the virtualizing of the functions is insignificant.
+
diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h
--- a/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -25,9 +25,11 @@
 #ifndef LLVM_CODEGEN_DFAPACKETIZER_H
 #define LLVM_CODEGEN_DFAPACKETIZER_H
 
+#include "llvm/CodeGen/MDLHazardRecognizer.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Support/Automaton.h"
 #include <cstdint>
 #include <map>
@@ -148,6 +150,9 @@
   // Map: MI -> SU.
   std::map<MachineInstr*, SUnit*> MIToSUnit;
 
+  // MDL-based packetizer (embedded in the schedule hazard recognizer)
+  ScheduleHazardRecognizer *HazardRec;
+
 public:
   // The AAResults parameter can be nullptr.
   VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
@@ -164,10 +169,28 @@
   // Return the ResourceTracker.
   DFAPacketizer *getResourceTracker() {return ResourceTracker;}
 
+  bool canReserveResources(MachineInstr &MI) {
+    if (HazardRec)
+      return HazardRec->canReserveResources(MI);
+    return ResourceTracker->canReserveResources(MI);
+  }
+  void reserveResources(MachineInstr &MI) {
+    if (HazardRec)
+      HazardRec->reserveResources(MI);
+    else
+      ResourceTracker->reserveResources(MI);
+  }
+  void clearResources() {
+    if (HazardRec)
+      HazardRec->clearResources();
+    else
+      ResourceTracker->clearResources();
+  }
+
   // addToPacket - Add MI to the current packet.
   virtual MachineBasicBlock::iterator addToPacket(MachineInstr &MI) {
     CurrentPacketMIs.push_back(&MI);
-    ResourceTracker->reserveResources(MI);
+    reserveResources(MI);
     return MI;
   }
 
diff --git a/llvm/include/llvm/CodeGen/MDLHazardRecognizer.h b/llvm/include/llvm/CodeGen/MDLHazardRecognizer.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MDLHazardRecognizer.h
@@ -0,0 +1,193 @@
+//=- llvm/CodeGen/MDLHazardRecognizer.h - MDL Scheduling Support -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MDLHazardRecognizer class, which implements
+// hazard-avoidance heuristics for scheduling using the MDL infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MDLHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_MDLHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MDLInfo.h"
+#include <memory>
+
+namespace llvm {
+
+using namespace mdl;
+
+class MachineInstr;
+class SUnit;
+
+/// MDLHazardRecognizer - This uses the MDL to determine if an instruction
+/// can be issued in the current cycle.  It uses both the current issue packet,
+/// and resources reserved for instructions issued in other cycles.
+class MDLHazardRecognizer : public ScheduleHazardRecognizer {
+protected:
+  const TargetSubtargetInfo *STI;
+  CpuInfo *Cpu;
+  Reservations *ResSet;
+  SlotSet Packet;
+  const char *DebugType;
+
+public:
+  MDLHazardRecognizer(const TargetSubtargetInfo *STI,
+                      const char *ParentDebugType = "")
+      : STI(STI), Cpu(STI->getCpuInfo()), ResSet(Cpu->allocReservations()),
+        DebugType(ParentDebugType) {
+    Packet.reserve(Cpu->getMaxIssue());
+  }
+  ~MDLHazardRecognizer() { delete ResSet; }
+
+  bool atIssueLimit() const override {
+    return Packet.size() == Cpu->getMaxIssue();
+  }
+  unsigned IssueSize() const override { return Packet.size(); }
+  SlotSet *getPacket() override { return &Packet; }
+
+  HazardType getHazardType(SUnit *SU, int Stalls = 0) override {
+    if (Stalls == 0)
+      return canReserveResources(*SU->getInstr()) ? NoHazard : Hazard;
+
+    auto Res = std::unique_ptr<Reservations>(ResSet->clone());
+    SlotDesc Cand(SU->getInstr(), STI);
+
+    for (; Stalls < 0; Stalls++)
+      Res->recede();
+    for (; Stalls > 0; Stalls--)
+      Res->advance();
+    return Cpu->canAddToBundle(Packet, Cand, *Res.get()) ? NoHazard : Hazard;
+  }
+
+  void Reset() override {
+    Packet.clear();
+    ResSet->reset();
+  }
+
+  void EmitInstruction(SUnit *SU) override {
+    SlotDesc Candidate(SU->getInstr(), STI);
+    Cpu->addToBundle(Packet, Candidate, *ResSet);
+  }
+
+  void EmitInstruction(MachineInstr *MI) override {
+    SlotDesc Candidate(MI, STI);
+    Cpu->addToBundle(Packet, Candidate, *ResSet);
+  }
+
+  void AdvanceCycle() override {
+    Cpu->addBundleToReservation(Packet, *ResSet);
+    ResSet->advance();
+    Packet.clear();
+  }
+
+  void RecedeCycle() override {
+    Cpu->addBundleToReservation(Packet, *ResSet);
+    ResSet->recede();
+    Packet.clear();
+  }
+
+  // These functions replace Packetizer methods.
+
+  void clearResources() override { ResSet->reset(); }
+
+  bool canReserveResources(MachineInstr &MI) override {
+    SlotDesc Candidate(&MI, STI);
+    return Cpu->canAddToBundle(Packet, Candidate, *ResSet);
+  }
+  void reserveResources(MachineInstr &MI) override {
+    SlotDesc Candidate(&MI, STI);
+    Cpu->addToBundle(Packet, Candidate, *ResSet);
+  }
+};
+
+/// MDLModuloHazardRecognizer - This uses the MDL to determine if an instruction
+/// can be issued in the current cycle.  It uses both the current issue packet,
+/// and resources reserved for instructions issued in other cycles.
+class MDLModuloHazardRecognizer : public ScheduleHazardRecognizer {
+protected:
+  const TargetSubtargetInfo *STI;
+  int II = 0;
+  CpuInfo *Cpu;
+  Reservations *ResSet;
+  SlotSet *Packets; // Slotsets for every cycle of the loop.
+  int Cycle = 0;
+  const char *DebugType;
+
+public:
+  MDLModuloHazardRecognizer(const TargetSubtargetInfo *STI, int II,
+                            const char *ParentDebugType = "")
+      : STI(STI), II(II), Cpu(STI->getCpuInfo()),
+        ResSet(Cpu->allocModuloReservations(II)), Packets(new SlotSet[II]),
+        DebugType(ParentDebugType) {
+    for (int ii = 0; ii < II; ii++)
+      Packets[ii].reserve(Cpu->getMaxIssue());
+  }
+  ~MDLModuloHazardRecognizer() {
+    delete ResSet;
+    delete[] Packets;
+  }
+
+  bool atIssueLimit() const override {
+    return Packets[Cycle].size() == Cpu->getMaxIssue();
+  }
+
+  void setCycle(unsigned II) {
+    Cycle = II;
+    ResSet->setCycle(Cycle);
+  }
+
+  // TODO: This function may be obsolete.
+  HazardType getHazardType(SUnit *SU, int Stalls = 0) override {
+    if (Stalls == 0)
+      return canReserveResources(*SU->getInstr()) ? NoHazard : Hazard;
+
+    SlotDesc Cand(SU->getInstr(), STI);
+    setCycle(Cycle + Stalls);
+    auto Haz =
+        Cpu->canAddToBundle(Packets[Cycle], Cand, *ResSet) ? NoHazard : Hazard;
+    setCycle(Cycle);
+    return Haz;
+  }
+
+  // TODO: This function may be obsolete.
+  void Reset() override {
+    Packets[Cycle].clear();
+    ResSet->reset();
+  }
+
+  // TODO: This function may be obsolete.
+  void EmitInstruction(SUnit *SU) override {
+    SlotDesc Candidate(SU->getInstr(), STI);
+    Cpu->addToBundle(Packets[Cycle], Candidate, *ResSet);
+  }
+
+  // TODO: This function may be obsolete.
+  void EmitInstruction(MachineInstr *MI) override {
+    SlotDesc Candidate(MI, STI);
+    Cpu->addToBundle(Packets[Cycle], Candidate, *ResSet);
+  }
+
+  // These functions replace Packetizer methods.
+  void clearResources() override { ResSet->reset(); }
+
+  bool canReserveResources(MachineInstr &MI) override {
+    SlotDesc Candidate(&MI, STI);
+    return Cpu->canAddToBundle(Packets[Cycle], Candidate, *ResSet);
+  }
+  void reserveResources(MachineInstr &MI) override {
+    SlotDesc Candidate(&MI, STI);
+    Cpu->addToBundle(Packets[Cycle], Candidate, *ResSet);
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MDLHAZARDRECOGNIZER_H
diff --git a/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h b/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
--- a/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -17,6 +17,8 @@
 #define LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H
 
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/MC/MDLInfo.h"
 
 namespace llvm {
   class DFAPacketizer;
@@ -65,6 +67,12 @@
     /// definition of DFA by a target.
     std::unique_ptr<DFAPacketizer> ResourcesModel;
 
+    /// Hazard model from parent.
+    ScheduleHazardRecognizer *HazardRec;
+
+    /// MDL database pointer (if used)
+    mdl::CpuInfo *Cpu;
+
     /// Resource model - packet/bundle model. Purely
     /// internal at the time.
     std::vector<SUnit*> Packet;
@@ -74,7 +82,8 @@
     int HorizontalVerticalBalance;
 
   public:
-    ResourcePriorityQueue(SelectionDAGISel *IS);
+    ResourcePriorityQueue(SelectionDAGISel *IS,
+                          ScheduleHazardRecognizer *HazardRec);
 
     bool isBottomUp() const override { return false; }
 
@@ -122,8 +131,9 @@
     void scheduledNode(SUnit *SU) override;
     bool isResourceAvailable(SUnit *SU);
     void reserveResources(SUnit *SU);
+    void reset();
 
-private:
+  private:
     void adjustPriorityOfUnscheduledPreds(SUnit *SU);
     SUnit *getSingleUnscheduledPred(SUnit *SU);
     unsigned numberRCValPredInSU (SUnit *SU, unsigned RCId);
diff --git a/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
--- a/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_CODEGEN_SCHEDULEHAZARDRECOGNIZER_H
 #define LLVM_CODEGEN_SCHEDULEHAZARDRECOGNIZER_H
 
+#include "llvm/MC/MDLInfo.h"
+
 namespace llvm {
 
 class MachineInstr;
@@ -50,6 +52,9 @@
   /// FIXME: remove this once MachineScheduler is the only client.
   virtual bool atIssueLimit() const { return false; }
 
+  virtual unsigned IssueSize() const { return 0; }
+  virtual mdl::SlotSet *getPacket() { return nullptr; }
+
   /// getHazardType - Return the hazard type of emitting this node.  There are
   /// three possible results.  Either:
   ///  * NoHazard: it is legal to issue this instruction on this cycle.
@@ -122,6 +127,14 @@
     for (unsigned i = 0; i < Quantity; ++i)
       EmitNoop();
   }
+
+  /// These methods replicate what DFAPacketizer methods do.
+  /// canReserveResources - Check that an instruction can be issued.
+  virtual bool canReserveResources(MachineInstr &MI) { return true; }
+  /// reserveResources - Check that an instruction can be issued.
+  virtual void reserveResources(MachineInstr &MI) {}
+  /// clearResources - Clear all resource tracking.
+  virtual void clearResources() {}
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -20,6 +20,8 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MDLInfo.h"
+#include <string>
 
 namespace llvm {
 
@@ -85,17 +87,29 @@
     return nullptr;
   }
 
+  /// Return true if this target uses MDL for modeling instruction behaviors.
+  bool hasMdlModel() const;
+  mdl::CpuInfo *getCpuInfo() const;
+
   /// Return true if this machine model includes an instruction-level
   /// scheduling model or cycle-to-cycle itinerary data.
   bool hasInstrSchedModelOrItineraries() const {
     return hasInstrSchedModel() || hasInstrItineraries();
   }
+
+  bool hasAnySchedModel() const {
+    return hasInstrSchedModel() || hasInstrItineraries() || hasMdlModel();
+  }
   bool enableIntervals() const;
   /// Identify the processor corresponding to the current subtarget.
   unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
 
   /// Maximum number of micro-ops that may be scheduled per cycle.
-  unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
+  unsigned getIssueWidth() const {
+    if (hasMdlModel())
+      return STI->getCpuInfo()->getMaxIssue();
+    return SchedModel.IssueWidth;
+  }
 
   /// Return true if new group must begin.
   bool mustBeginGroup(const MachineInstr *MI,
@@ -110,6 +124,8 @@
 
   /// Get the number of kinds of resources for this target.
   unsigned getNumProcResourceKinds() const {
+    if (hasMdlModel())
+      return STI->getCpuInfo()->getMaxFuncUnitId() + 1;
     return SchedModel.getNumProcResourceKinds();
   }
 
@@ -119,7 +135,9 @@
   }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  const char *getResourceName(unsigned PIdx) const {
+  std::string getResourceName(unsigned PIdx) const {
+    if (hasMdlModel())
+      return (std::string) "U" + std::to_string(PIdx);
     if (!PIdx)
       return "MOps";
     return SchedModel.getProcResource(PIdx)->Name;
@@ -140,6 +158,7 @@
 
   /// Multiply the number of units consumed for a resource by this factor
   /// to normalize it relative to other resources.
+  /// The MDL passes in a pool size (or 1), rather than a resource id.
   unsigned getResourceFactor(unsigned ResIdx) const {
     return ResourceFactors[ResIdx];
   }
@@ -157,11 +176,17 @@
   }
 
   /// Number of micro-ops that may be buffered for OOO execution.
-  unsigned getMicroOpBufferSize() const { return SchedModel.MicroOpBufferSize; }
+  unsigned getMicroOpBufferSize() const {
+    if (hasMdlModel())
+      return STI->getCpuInfo()->getReorderBufferSize();
+    return SchedModel.MicroOpBufferSize;
+  }
 
   /// Number of resource units that may be buffered for OOO execution.
   /// \return The buffer size in resource units or -1 for unlimited.
   int getResourceBufferSize(unsigned PIdx) const {
+    if (hasMdlModel())
+      return 1; // currently unused
     return SchedModel.getProcResource(PIdx)->BufferSize;
   }
 
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MDLInfo.h"
 #include "llvm/Support/CodeGen.h"
 #include <memory>
 #include <vector>
@@ -67,7 +68,8 @@
                       const MCWriteProcResEntry *WPR,
                       const MCWriteLatencyEntry *WL,
                       const MCReadAdvanceEntry *RA, const InstrStage *IS,
-                      const unsigned *OC, const unsigned *FP);
+                      const unsigned *OC, const unsigned *FP,
+                      const mdl::CpuTableDef *MDL);
 
 public:
   // AntiDepBreakMode - Type of anti-dependence breaking that should
diff --git a/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h b/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h
--- a/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h
@@ -14,7 +14,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MDLInfo.h"
 #include <limits>
 #include <memory>
 #include <utility>
@@ -36,6 +38,7 @@
   /// Not limited to VLIW targets per se, but assumes definition of resource
   /// model by a target.
   DFAPacketizer *ResourcesModel;
+  ScheduleHazardRecognizer *HazardRec;
 
   const TargetSchedModel *SchedModel;
 
@@ -46,8 +49,12 @@
   /// Total packets created.
   unsigned TotalPackets = 0;
 
+  // Information for using MDL-based bundling.
+  mdl::CpuInfo *Cpu; // MDL-based CPU descriptor (or null)
+
 public:
-  VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM);
+  VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM,
+                    ScheduleHazardRecognizer *HazardRec);
   VLIWResourceModel &operator=(const VLIWResourceModel &other) = delete;
   VLIWResourceModel(const VLIWResourceModel &other) = delete;
   virtual ~VLIWResourceModel();
@@ -58,8 +65,19 @@
   virtual bool isResourceAvailable(SUnit *SU, bool IsTop);
   virtual bool reserveResources(SUnit *SU, bool IsTop);
   unsigned getTotalPackets() const { return TotalPackets; }
-  size_t getPacketInstCount() const { return Packet.size(); }
-  bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); }
+
+  size_t getPacketInstCount() const {
+    return Cpu ? HazardRec->IssueSize() : Packet.size();
+  }
+  bool isInPacket(SUnit *SU) const {
+    if (Cpu) {
+      for (auto &slot : *HazardRec->getPacket())
+        if (slot.getMI() == SU->getInstr())
+          return true;
+      return false;
+    }
+    return is_contained(Packet, SU);
+  }
 
 protected:
   virtual DFAPacketizer *createPacketizer(const TargetSubtargetInfo &STI) const;
@@ -242,7 +260,8 @@
 protected:
   virtual VLIWResourceModel *
   createVLIWResourceModel(const TargetSubtargetInfo &STI,
-                          const TargetSchedModel *SchedModel) const;
+                          const TargetSchedModel *SchedModel,
+                          ScheduleHazardRecognizer *HazardRec) const;
 
   SUnit *pickNodeBidrectional(bool &IsTopNode);
 
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -16,6 +16,9 @@
 /* Define to 1 to enable crash overrides, and to 0 otherwise. */
 #cmakedefine01 ENABLE_CRASH_OVERRIDES
 
+/* Define to 1 to enable usage of MDL infrastructure, and to 0 otherwise. */
+#cmakedefine01 ENABLE_MDL_USE
+
 /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */
 #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS
 
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MDLInfo.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
 #include "llvm/TargetParser/Triple.h"
 #include <cassert>
@@ -92,6 +93,10 @@
   FeatureBitset FeatureBits;           // Feature bits for current CPU + FS
   std::string FeatureString;           // Feature string
 
+  // Machine Description based machine model
+  const mdl::CpuTableDef *CpuTable = nullptr;
+  mdl::CpuInfo *CpuModel = nullptr;
+
 public:
   MCSubtargetInfo(const MCSubtargetInfo &) = default;
   MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU,
@@ -99,7 +104,8 @@
                   ArrayRef<SubtargetSubTypeKV> PD,
                   const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
                   const MCReadAdvanceEntry *RA, const InstrStage *IS,
-                  const unsigned *OC, const unsigned *FP);
+                  const unsigned *OC, const unsigned *FP,
+                  const mdl::CpuTableDef *MDL);
   MCSubtargetInfo() = delete;
   MCSubtargetInfo &operator=(const MCSubtargetInfo &) = delete;
   MCSubtargetInfo &operator=(MCSubtargetInfo &&) = delete;
@@ -162,6 +168,10 @@
   /// Get the machine model for this subtarget's CPU.
   const MCSchedModel &getSchedModel() const { return *CPUSchedModel; }
 
+  const mdl::CpuTableDef *getCpuTable() const { return CpuTable; }
+  mdl::CpuInfo *getCpuInfo() const { return CpuModel; }
+  bool hasMdlModel() const { return CpuModel; }
+
   /// Return an iterator at the first process resource consumed by the given
   /// scheduling class.
   const MCWriteProcResEntry *getWriteProcResBegin(
diff --git a/llvm/include/llvm/MC/MDLBundle.h b/llvm/include/llvm/MC/MDLBundle.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/MC/MDLBundle.h
@@ -0,0 +1,470 @@
+//===- MDLBundle.h  MDL-based instruction bundling implementation --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MDL-based Bundle Packer.  This file defines the APIs and implementation
+// for an MDL-based parallel-issue instruction bundler, appropriate for both
+// VLIW processors and OOO, superscalar processors. It can bundle MCInsts or
+// MachineInstrs. The general approach is to incrementally build up bundles
+// one instruction at a time, adding the highest priority instructions first.
+// This is a non-ejecting bundler: when adding a new instruction to a bundle,
+// we don't eject instructions in the bundle to allow a new instruction fit,
+// although we do reconsider all functional unit and resource assignments each
+// time we add an instruction.
+//
+// A bundle is self-contained - it contains all the functional unit and
+// resource assignments for the instructions in the bundle.
+//
+// We also define a resource reservation table which can be used to track
+// scheduled instructions' resource uses over time, and it can be used as an
+// additional external constraint to the bundle packer.
+//
+// The "heavy lifting" of this is performed in templatized member functions
+// which are specialized for each subtarget, so that most of the objects
+// can be statically sized and allocated.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MDL_BUNDLE_H_
+#define MDL_BUNDLE_H_
+
+#include "llvm/MC/MDLInfo.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace llvm {
+namespace mdl {
+
+/// Given a set of LLVM instructions, create a set of bundling candidates.
+inline SlotSet createCandidateSet(const InstrSet &Insts,
+                                  const TargetSubtargetInfo *STI) {
+  SlotSet Candidates;
+  Candidates.reserve(Insts.size());
+  for (unsigned i = 0; i < Insts.size(); i++)
+    Candidates.emplace_back(Insts[i], STI);
+  return Candidates;
+}
+
+/// Given a set of LLVM instructions, create a set of bundling candidates.
+inline SlotSet createCandidateSet(const MCInstrSet &Insts,
+                                  const MCSubtargetInfo *STI,
+                                  const MCInstrInfo *MCII) {
+  SlotSet Candidates;
+  Candidates.reserve(Insts.size());
+  for (unsigned i = 0; i < Insts.size(); i++)
+    Candidates.emplace_back(Insts[i], STI, MCII);
+  return Candidates;
+}
+
+/// Given a set of instructions ordered by priority, attempt to bundle as
+/// many of them as possible, and return the set of bundled instructions.
+/// Note that this method doesn't consider external resource conflicts.
+template <typename CpuParams>
+SlotSet CpuConfig<CpuParams>::bundleCandidates(const SlotSet *Candidates) {
+  SlotSet Bundle; // Start with an empty bundle.
+  Bundle.reserve(CpuParams::MaxIssue);
+
+  // One at a time, attempt to add instuctions to a bundle.
+  ReservationsConfig<CpuParams> Res; // Start with an empty reservation table
+  for (auto &Cand : *Candidates)
+    addToBundle(Bundle, Cand, Res);
+
+  return Bundle;
+}
+
+/// Determine if an instruction can be added to a bundle.  Return true if
+/// it can be, else return false. Note: we don't modify the SlotSet!
+/// This requires all the work of adding to a bundle without actually adding
+/// to the bundle.  Normally you should just call AddToBundle directly.
+template <typename CpuParams>
+bool CpuConfig<CpuParams>::canAddToBundle(SlotSet &Bundle,
+                                          const SlotDesc &Candidate,
+                                          const Reservations &Res) {
+  SlotSet LocalBundle = Bundle; // We don't want to modify the bundle.
+  return addToBundle(LocalBundle, Candidate, Res);
+}
+
+/// Try to add an instruction to a bundle:
+///   - First check its resources against already-scheduled instructions.
+///   - Make sure it can allocate resources for any pool requests.
+/// Return true if we successfully added the instruction.
+template <typename CpuParams>
+bool CpuConfig<CpuParams>::addToBundle(SlotSet &Bundle,
+                                       const SlotDesc &Candidate,
+                                       const Reservations &Res) {
+  Bundle.push_back(Candidate);
+  if (Candidate.getSubunits() == nullptr)
+    return true;
+
+  // Starting with the existing bundle and its subunit assignments, recursively
+  // find a set of subunit assignments and resource allocations that
+  // accomodates the new instruction.
+  auto LocalRes = static_cast<const ReservationsConfig<CpuParams> &>(Res);
+  if (attemptToBundle(Bundle, LocalRes, 0, false) == BundleStatus::kSuccess)
+    return true;
+
+  Bundle.pop_back();
+  return false;
+}
+
+// Given a "final" bundle, add it to the resource reservation table.
+template <typename CpuParams>
+void CpuConfig<CpuParams>::addBundleToReservation(SlotSet &Bundle,
+                                                  Reservations &Res) {
+  auto &LocalRes = static_cast<ReservationsConfig<CpuParams> &>(Res);
+  for (auto &Cand : Bundle)
+    if (Cand.getSubunits())
+      addResources(Cand, (*Cand.getSubunits())[Cand.getSubunitId()], LocalRes);
+}
+
+/// Given a proposed bundle, find a permutation of the candidate instructions'
+/// subunits that don't have interfering resource usage.
+/// If some of the instructions have already been bundled (they have an
+/// existing subunit id), use that set of subunits as a starting point.
+/// If any of assignments need to be reconsidered, reset all the remaining
+/// unbundled instructions' subunit ids.
+template <typename CpuParams>
+BundleStatus
+CpuConfig<CpuParams>::attemptToBundle(SlotSet &Bundle,
+                                      ReservationsConfig<CpuParams> &Res,
+                                      int WhichSlot, bool Reset) {
+  // If we've found a valid bundle assignment for all the candidates, attempt
+  // to allocate resources for the entire bundle.
+  // NOTE: Currently, if the pool allocation fails, the current bundle of
+  // instructions fails. Backtracking over subunit assignments typically won't
+  // impact whether the pool allocation succeeds, and its EXTREMELY expensive.
+  // This is based on the observation that an instruction's pool requests are
+  // typically identical across all subunit candidates.
+  if (WhichSlot == (int)Bundle.size())
+    return allocatePools(Bundle, Res);
+
+  SlotDesc &Item = Bundle[WhichSlot];
+
+  // If this is some kind of pseudo-instruction, we don't need to check
+  // its resources.
+  if (Item.getSubunits() == nullptr)
+    return attemptToBundle(Bundle, Res, WhichSlot + 1, Reset);
+
+  // Iterate over this instruction's subunits, and try to find a slot
+  // allocation that is compatible with earlier bundled instructions. If
+  // we succeed, recursively try the rest of the slots.
+  // If this is the first attempt to bundle this instruction, we iterate over
+  // all of its subunits, otherwise we start with the previous subunit
+  // assignment.
+  SubunitVec &subunits = *Item.getSubunits();
+  int OriginalId = Item.getSubunitId();
+  if (Reset)
+    Item.setSubunitId(0);
+
+  for (int Id = Item.getSubunitId(); Id < (int)subunits.size();
+       Id++, Reset = true) {
+    auto LocalRes = Res;
+    if (addResources(Item, subunits[Id], LocalRes)) {
+      Item.setSubunitId(Id);
+      auto Status = attemptToBundle(Bundle, LocalRes, WhichSlot + 1, Reset);
+      // If we succeeded, update the resource set, and return success.
+      if (Status == BundleStatus::kSuccess) {
+        Res = LocalRes;
+        return Status;
+      }
+      // If we failed allocation for the bundle, return that we failed.
+      // Note: if a target has specific instructions which have subunits with
+      // very different pool allocation requests, we may want to simply
+      // delete the next statement, and the packing will try much harder to
+      // find allocations for different combinations of subunits.  This is
+      // VERY expensive, and probably not terribly common. This might happen
+      // if a set of equivalent candidate functional units have separate pools
+      // of resources to allocate from.
+      if (Status == BundleStatus::kAllocationFailed) {
+        Item.setSubunitId(OriginalId); // Back out of subunit assignments.
+        return Status;
+      }
+    }
+  }
+  Item.setSubunitId(OriginalId); // Back out of subunit assignments.
+  return BundleStatus::kBundleFailed;
+}
+
+/// Add resource reference to current resource set.  Return true if there were
+/// no conflicts.  Note that we don't attempt to share these resources, even
+/// if the resource is shareable. (Its quite expensive, even if done
+/// efficiently)
+template <typename CpuParams>
+bool CpuConfig<CpuParams>::addResources(SlotDesc &Slot, Subunit &WhichSubunit,
+                                        ReservationsConfig<CpuParams> &res) {
+  if (auto *Refs = WhichSubunit.getUsedResourceReferences()) {
+    for (auto const &Ref : ReferenceIter<ResourceRef>(Refs, Slot.getInst())) {
+      if (Ref.isUse() && !Ref.isDuplicate() && !Ref.isUnreserved())
+        if (res.testSet(Ref.getResourceId(), Ref.getPhase(Slot.getInst()),
+                        Ref.getCycles()))
+          return false;
+    }
+  }
+  return true;
+}
+
+/// Find statically allocated resource uses and add them to the values set.
+template <typename CpuParams>
+void CpuConfig<CpuParams>::findStaticResources(
+    SlotSet &Bundle, ResourceValues<CpuParams> &Values) {
+  for (auto &slot : Bundle)
+    if (slot.getSubunits())
+      if (auto *Refs = slot.getSubunit()->getUsedResourceReferences())
+        for (auto &Ref : ReferenceIter<ResourceRef>(Refs, slot.getInst()))
+          if (Ref.isUse() && Ref.isValidOperandIndex() && Ref.isShared() &&
+              !Ref.isUnreserved()) {
+            int value = slot.getInst()->getOperand(Ref.getOperandIndex());
+            Values.set(Ref.getResourceId(), &value, 1);
+          }
+}
+
+/// Given a candidate bundle and a proposed set of subunits, attempt to find
+/// a resource allocation for all the pool requests.  Note that we save any
+/// prior allocations, and restore them if this allocation fails.
+template <typename CpuParams>
+BundleStatus
+CpuConfig<CpuParams>::allocatePools(SlotSet &Bundle,
+                                    ReservationsConfig<CpuParams> &Res) {
+  std::vector<SharedResourceSet> SavedResources;
+
+  // Collate all pool requests into lists for each subpool and size combination.
+  PoolRequests<CpuParams> Pools;
+  for (auto &Slot : Bundle) {
+    SavedResources.push_back(std::move(Slot.getResources()));
+    if (Slot.getSubunits()) {
+      if (auto *Refs = Slot.getSubunit()->getPooledResourceReferences())
+        for (auto &Ref : ReferenceIter<PooledResourceRef>(Refs, Slot.getInst()))
+          if (Ref.isUse())
+            Pools.AddPoolRequest(&Slot, &Ref);
+    }
+  }
+
+  // Find the set of shared resources used by the bundle.
+  ResourceValues<CpuParams> Values;
+  findStaticResources(Bundle, Values);
+
+  // Allocate each pool independently.  If any pool fails, we fail, and
+  // restore all the old resource allocations.
+  for (int PoolId = 0; PoolId < CpuParams::PoolCount; PoolId++)
+    if (!allocatePool(Pools.getPool(PoolId), Res, Values)) {
+      int Id = 0;
+      for (auto &Slot : Bundle)
+        Slot.setResources(std::move(SavedResources[Id++]));
+      return BundleStatus::kAllocationFailed;
+    }
+  return BundleStatus::kSuccess;
+}
+
+/// Given a pool request, fetch operand values from the instruction, and return
+/// the normalized values in the "values" array.  Return false if we can't
+/// share values.
+inline bool getOperandValues(PoolRequest &Item, int Count, int Values[]) {
+  Instr *Inst = Item.getInst();
+  int OperandId = Item.getOperandId();
+  auto *Pool = Item.getRef()->getPool();
+  if (Pool->hasValueFunc() && Inst->isOpndLiteral(OperandId))
+    return Pool->getValues(Inst, OperandId, Count, Values);
+
+  // For now, we don't attempt to share virtual register-based resources.
+  if (Inst->isOpndVirtualRegister(OperandId))
+    return false;
+
+  // Handle simple literals and registers.
+  if (Count == 1) {
+    Values[0] = Inst->getOperand(OperandId);
+    return true;
+  }
+  return false;
+}
+
+/// Attempt to allocate a set of pool resources for a reference.
+/// Check to see if we can share resources with existing resource allocations.
+/// Note that for multi-cycle pool allocations, we don't have enough information
+/// to share these kinds of resources.
+template <typename CpuParams>
+bool CpuConfig<CpuParams>::allocateResource(PoolRequest &Item, int Id,
+                                            int Count,
+                                            ReservationsConfig<CpuParams> &Res,
+                                            ResourceValues<CpuParams> &Values) {
+  // Check to see if we can share a resource with another operand.
+  int OpndValues[CpuParams::MaxPoolAllocation + 1] = {0};
+
+  int Phase = Item.getPhase();
+  int Cycles = Item.getCycles();
+
+  // If this item has shared bits (width), fetch the operands' normalized
+  // values from the operand, and check them against currently shared values.
+  bool Shared = Cycles == 1 && Item.isShared() &&
+                getOperandValues(Item, Count, OpndValues);
+  if (Shared) {
+    if (Values.check(Id, OpndValues, Count)) {
+      for (int Off = 0; Off < Count; Off++) {
+        int Opnd = Item.getRef()->getOperandIndex();
+        Item.getSlot()->getResources().emplace_back(Id + Off, Opnd,
+                                                    OpndValues[Off], Count);
+      }
+      return true;
+    }
+  }
+
+  // Non-sharing case - see if all the specified resources are available.
+  for (int Off = 0; Off < Count; Off++)
+    if (Res.test(Id + Off, Phase, Cycles))
+      return false;
+
+  // If the entire allocation succeeds, mark all the resources as reserved.
+  // Add the resources used to the slot they're allocated for.
+  for (int Off = 0; Off < Count; Off++) {
+    Res.set(Id + Off, Phase, Cycles);
+    int Opnd = Item.getRef()->getOperandIndex();
+    Item.getSlot()->getResources().emplace_back(Id + Off, Opnd, OpndValues[Off],
+                                                Count);
+  }
+
+  // If the item is shared, store off the operand values.
+  if (Shared)
+    Values.set(Id, OpndValues, Count);
+  return true;
+}
+
+/// Given a set of pool requests for a single pool, determine if we can
+/// allocate all of them. Each pool request list has the following attributes:
+///    - all members of the list are requesting the same subpool.
+///    - all requests are for the same (non-zero) number of resources.
+///    - this set of pool requests is less constrained than previous pool
+///      request sets, and more constrained than future pool request sets.
+template <typename CpuParams>
+bool CpuConfig<CpuParams>::allocatePool(PoolRequestSet &Pool,
+                                        ReservationsConfig<CpuParams> &Res,
+                                        ResourceValues<CpuParams> &Values) {
+  if (Pool.empty())
+    return true;
+
+  ResourceIdType *Base = Pool[0].getResourceIds(); // ptr to resource id pool
+  int Count = Pool[0].getCount();                  // # of resources requested
+  int First = Pool[0].getFirst();           // offset from first resource id
+  int Last = Pool[0].getLast() - Count + 1; // offset from last resource id
+
+  // For multi-resource allocations, make sure first and last are reasonable.
+  if (Count != 1) {
+    if (Last % Count != 0)
+      Last -= Last % Count;
+    if (First % Count != 0)
+      First += Count - (First % Count);
+  }
+
+  // Choose an allocation order for all requests for this pool (or subpool).
+  // If the request is for any member of a pool, order doesn't matter.
+  // If the request is for the head of the pool, allocate in forward order.
+  // If the request is for the tail of the pool, allocate in backwards order.
+  if (First == 0) {
+    for (auto &Item : Pool) {
+      bool Allocated = false;
+      for (int id = First; id <= Last; id += Count)
+        if ((Allocated = allocateResource(Item, Base[id], Count, Res, Values)))
+          break;
+      if (!Allocated)
+        return false;
+    }
+  } else {
+    for (auto &Item : Pool) {
+      bool Allocated = false;
+      for (int Id = Last; Id >= First; Id -= Count)
+        if ((Allocated = allocateResource(Item, Base[Id], Count, Res, Values)))
+          break;
+      if (!Allocated)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+// Write out all the instructions in a bundle.
+inline void CpuInfo::dumpBundle(std::string Cpu, std::string Msg,
+                                SlotSet &Bundle) {
+  std::cout << "\n"
+            << Msg << " " << Cpu << "  " << Bundle.size()
+            << " -----------------------------------------\n";
+  for (auto &Slot : Bundle)
+    std::cout << dumpSlot("  ", Slot);
+}
+
+/// Write out a single bundled instruction, and what resources it uses.
+inline std::string CpuInfo::dumpSlot(std::string Msg, SlotDesc &Slot) {
+  std::string Out = Msg;
+  int Id = Slot.getInst()->getOpcode();
+  auto Name = Slot.getInst()->getName();
+  Out += formatv("su: {0}/{1} {2} : {3}", Slot.getSubunitId(),
+                 Slot.getSubunits()->size(), Id, Name);
+
+  if (!Slot.getSubunits())
+    return Out + "\n";
+
+  if (auto *Refs = Slot.getSubunit()->getUsedResourceReferences()) {
+    Out += "\t <";
+    for (auto &ref : ReferenceIter<ResourceRef>(Refs, Slot.getInst()))
+      Out += formatv("{0},", ref.getResourceId());
+    Out += ">";
+  }
+
+  if (!Slot.getResources().empty()) {
+    Out += "\t [";
+    for (auto [id, opnd, value, count] : Slot.getResources())
+      Out += formatv("{{{0},{1},{2}},", id, opnd, value);
+    Out += "]";
+  }
+
+  return Out + "\n";
+}
+
+// Validate that a bundle doesn't oversubscribe resources.
+template <typename CpuParams>
+bool CpuConfig<CpuParams>::validateBundle(std::string Cpu, SlotSet &Bundle) {
+  std::map<int, int> Resources[CpuParams::MaxUsedResourceId + 1];
+
+  // First check subunit static resources.
+  for (auto &Slot : Bundle) {
+    if (!Slot.getSubunits())
+      continue;
+    if (auto *Refs = Slot.getSubunit()->getUsedResourceReferences()) {
+      for (auto &Ref : ReferenceIter<ResourceRef>(Refs, Slot.getInst())) {
+        if (Ref.isFus())
+          continue;
+        int Phase = Ref.getPhase(Slot.getInst());
+        int Id = Ref.getResourceId();
+        if (Ref.isUnreserved())
+          continue; // Skip unreserved resources.
+        if (Ref.isValidOperandIndex() && Ref.isShared()) {
+          int OpndValue = Slot.getInst()->getOperand(Ref.getOperandIndex());
+          if (!Resources[Phase].emplace(Id, OpndValue).second) {
+            if (Resources[Phase][Id] != OpndValue) {
+              dumpBundle(Cpu,
+                         formatv("Failed:{0},{1},{2}", Id, OpndValue, Phase),
+                         Bundle);
+              return false;
+            }
+          }
+        }
+      }
+    }
+
+    // Check allocated resources (all are in the same pipe phase).
+    for (auto [Id, OpndId, Value, Count] : Slot.getResources())
+      if (!Resources[0].emplace(Id, Value).second) {
+        if (Resources[0][Id] != Value) {
+          dumpBundle(Cpu, formatv("Failed:{0},{1} ", Id, Value), Bundle);
+          return false;
+        }
+      }
+  }
+  return true;
+}
+
+} // namespace mdl
+} // namespace llvm
+
+#endif // MDL_BUNDLE_H_
diff --git a/llvm/include/llvm/MC/MDLInfo.h b/llvm/include/llvm/MC/MDLInfo.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/MC/MDLInfo.h
@@ -0,0 +1,1359 @@
+//===- MDLInfo.h - MDL-based instructions modeling ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions that describe the generated machine
+// description database. These definitions must stay in sync with what the
+// mdl compiler produces. The overall schema of the database looks like this:
+//
+// The top-level object for each CPU family is the CpuTable, which is a
+// dictionary of subtarget descriptors. Each entry in the dictionary is a
+// subtarget name and a CpuInfo object that describes a single subtarget.
+//
+// A CpuInfo object captures a basic set of architectural parameters, and
+// includes a pointer to the CPU's subunit table and optional forwarding
+// information table.
+//
+// Each subunit table contains a pointer to a vector of valid subunits for
+// each instruction valid on that CPU.
+//
+// Each subunit object is a vector of tuples. Each tuple represents one
+// possible behavior of an instruction (or a set of instructions), including
+// all of its operand references, its resource requirements, its pooled
+// resource requirements, and any additional operand constraints to apply
+// to the instruction. Each subunit object therefore contains all feasible
+// behaviors of a client instruction.
+//
+// The four subunit components are described in separate tables, and heavily
+// shared across subunits and CPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MDL_INFO_H
+#define MDL_INFO_H
+
+#include <algorithm>
+#include <bitset>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/MathExtras.h"
+
+namespace llvm {
+
+// Declarations of LLVM types that describe targets and instructions.
+class MachineInstr;
+class TargetSubtargetInfo;
+class TargetInstrInfo;
+class MCInst;
+class MCSubtargetInfo;
+struct MCSchedModel;
+class MCInstrInfo;
+class TargetSchedModel;
+
+namespace mdl {
+
+// Fundamental type of a reference to an operand or resource.
+// These are powers of two so that we can quickly check for subsets of them.
+struct ReferenceTypes {
+  using Item = int16_t;
+  static constexpr Item RefNull = 0;
+  static constexpr Item RefPred = 1;     // use of a predicate operand
+  static constexpr Item RefUse = 2;      // use of an operand and/or resource
+  static constexpr Item RefDef = 4;      // operand def (resource use)
+  static constexpr Item RefKill = 8;     // kill of an operand
+  static constexpr Item RefUseDef = 16;  // operand use/def (use of operand)
+  static constexpr Item RefHold = 32;    // wait on availability of resource
+  static constexpr Item RefReserve = 64; // reserve resource until some cycle
+  static constexpr Item RefFus = 128;    // use a functional unit
+  static constexpr Item RefCond = 256;   // conditional reference
+
+  static constexpr Item AnyUse = RefUse | RefUseDef | RefPred;
+  static constexpr Item AnyDef = RefDef | RefUseDef;
+};
+using ReferenceType = ReferenceTypes::Item;
+
+// The index of an operand into an instruction.
+using OperandId = int8_t; // These start at 0, so < 0 means invalid.
+
+// Reference flags field.  Values are powers of 2 so we can combine them.
+struct ReferenceFlags {
+  using Item = int8_t;
+  static constexpr int kNone = 0;
+  // Reference flags for operand and resource references.
+  static constexpr int kProtected = 1;   // Reference is hardware-protected.
+  static constexpr int kUnprotected = 2; // Reference is not h/w protected.
+  static constexpr int kDuplicate = 4;   // Reference is duplicate reference
+
+  // Reference flags for explicit functional unit references.
+  static constexpr int kUnreserved = 1;   // Funcunit is not reserved.
+  static constexpr int kBuffered = 2;     // Funcunit has an issue queue.
+  static constexpr int kBeginGroup = 4;   // Instr must begin issue group.
+  static constexpr int kEndGroup = 8;     // Instr must end issue group.
+  static constexpr int kSingleIssue = 16; // Instr must issue alone.
+  static constexpr int kRetireOOO = 32;   // Instr may retire out of order.
+
+  static bool is_protected(Item flag) { return flag & kProtected; }
+  static bool is_unprotected(Item flag) { return flag & kUnprotected; }
+  static bool is_duplicate(Item flag) { return flag & kDuplicate; }
+  static bool is_unreserved(Item flag) { return flag & kUnreserved; }
+  static bool is_buffered(Item flag) { return flag & kBuffered; }
+  static bool is_begin_group(Item flag) { return flag & kBeginGroup; }
+  static bool is_end_group(Item flag) { return flag & kEndGroup; }
+  static bool is_single_issue(Item flag) { return flag & kSingleIssue; }
+  static bool is_retire_ooo(Item flag) { return flag & kRetireOOO; }
+};
+using ReferenceFlag = ReferenceFlags::Item;
+
+// The index of a reference resource or resource pool.
+using ResourceIdType = int16_t; // These start at 0, so < 0 means invalid.
+
+// CPU-specific id of each resource pool.
+using PoolIdType = int8_t;
+
+// The number of resources in a pool.
+using PoolSizeType = int8_t;
+
+// The number of bits represented by a resource, if shared.
+using PoolBitsType = int8_t; // -1 means resource is not shared.
+
+// An integer that represents a pipeline stage.
+using PipePhaseType = int16_t; // These start at 0, so < 0 means invalid.
+
+// An integer that represents the number of stages a resource is used.
+using UseCyclesType = uint16_t;
+
+// An index into the register class table.
+using RegisterClassIndexType = int8_t; // These start at 0, so < 0 is invalid.
+
+// Definitions of objects in the target database.
+class Instr;             // MDL abstract description of an instruction
+class OperandRef;        // A single operand reference
+class ResourceRef;       // A single resource reference
+class PoolDescriptor;    // An allocation pool descriptor
+class PooledResourceRef; // A pooled resource allocation descriptor
+class OperandConstraint; // An operand constraint descriptor
+class Subunit;           // A subunit descriptor
+class CpuInfo;           // Information about a single CPU/Subtarget
+class CpuTableDef;       // A table of all CPUs/Subtargets
+
+template <class T> class ConditionalRef;
+
+// Some compilers don't allow specialization of a type alias (like this):
+//      using ConditionalRefPool = class ConditionalRef<PooledResourceRef>;
+// So we need to do this workaround:
+template <class T> struct TypeAlias {
+  using type = ConditionalRef<T>;
+};
+
+using ConditionalRefOpnd = typename TypeAlias<OperandRef>::type;
+using ConditionalRefRes = typename TypeAlias<ResourceRef>::type;
+using ConditionalRefPool = typename TypeAlias<PooledResourceRef>::type;
+
+// Function definitions used to evaluate predicates, calculating pipeline
+// phases, determining resource pool sizes, and fetching values from
+// instructions
+using PredFunc = bool (*)(Instr *ins);
+using PipeFunc = unsigned (*)(Instr *ins);
+using PoolFuncType = int (*)(Instr *ins, int operand_index);
+using OpndValueFunc = bool (*)(Instr *ins, int operand_index, int count,
+                               int values[]);
+
+// A simple wrapper to check the range for calculated resource latencies.
+inline int getResourcePhase(PipeFunc Func, Instr *Ins);
+
+//----------------------------------------------------------------------------
+// We initialize a *LOT* of vectors of objects, which incurs a significant
+// runtime overhead when the compiler autoinitialization occurs. So rather than
+// use vectors, we use an "InitializationVector" instead, which incurs zero
+// overhead**.  This is a limited "vector" substitute with limited iteration
+// capabilities, but is sufficient for all uses of these objects.
+//----------------------------------------------------------------------------
+// **Note: If the client type has a constructor, the compiler by default
+// generates code to call the constructor, and the translation unit must be
+// compiled with optimization to eliminate the code and only produce
+// initialized data.  Alternatively, we can delete all the constructors so
+// that we don't -have- to compile with optimization and still avoid the
+// initialization time overhead.
+//----------------------------------------------------------------------------
+template <typename T> class InitializationVector {
+public:
+  unsigned char Size; // Number of entries in the vector.
+  T *Data;            // Pointer to the data.
+
+public:
+  class Iterator {
+    T *Iter;
+
+  public:
+    T &operator*() const { return *Iter; }
+    T *operator->() { return Iter; }
+    Iterator &operator++() {
+      ++Iter;
+      return *this;
+    }
+    Iterator operator++(int) {
+      Iterator tmp = *this;
+      ++(*this);
+      return tmp;
+    }
+
+    friend bool operator==(const Iterator &a, const Iterator &b) {
+      return a.Iter == b.Iter;
+    }
+    friend bool operator!=(const Iterator &a, const Iterator &b) {
+      return a.Iter != b.Iter;
+    }
+    Iterator(T *Data) : Iter(Data) {}
+  };
+  Iterator begin() { return Iterator(&Data[0]); }
+  Iterator end() { return Iterator(&Data[Size]); }
+  unsigned size() const { return Size; }
+  T &operator[](int index) { return Data[index]; }
+};
+
+//-----------------------------------------------------------------------------
+// ReferenceIter lets us wrap a vector (or InitializationVector) of
+// reference lists (that may include nested predicated sublists) and iterate
+// over all the members transparently.
+// The client T type must have 2 methods:
+//     isCond() - returns true if this is a "conditional" reference
+//     getIfElse() - Return the pointer to a "conditional reference object".
+// The associated "conditional reference object" must have 3 methods:
+//     evalPredicate(Ins) - Evaluate the predicate, return true/false.
+//     getElseClause() - Return the else clause associated with the reference.
+//     getRefs() - Return the predicated reference vector pointer.
+//-----------------------------------------------------------------------------
+// Since references lists can have arbitrarily deeply nested conditionals, the
+// "iterator" needs to dynamically keep track of nested conditional reference
+// list iterators.  We use vectors of input_iterators to implement a stack.
+// Since conditional reference lists are the exception, we want the "normal"
+// case to run as fast as possible, so we only use the iterator vectors when
+// we encounter a conditional reference.
+//-----------------------------------------------------------------------------
+// Note that these client objects are generally PODs because they are
+// auto-initialized by the MDL compiler.
+//-----------------------------------------------------------------------------
+template <typename T> class ReferenceIter {
+private:
+  InitializationVector<T> *Refs; // The top-level vector of references.
+  Instr *Ins;                    // Instruction to use with predicates.
+
+  struct Iterator {
+    Instr *Ins;
+    typename InitializationVector<T>::Iterator Iter, End;
+    std::vector<typename InitializationVector<T>::Iterator> Iters, Ends;
+
+    Iterator(Instr *Ins, InitializationVector<T> *Refs)
+        : Ins(Ins), Iter(Refs->begin()), End(Refs->end()) {
+      advance();
+    }
+
+    Iterator(Instr *Ins, typename InitializationVector<T>::Iterator End)
+        : Ins(Ins), Iter(End), End(End) {}
+
+    T &operator*() const { return *Iter; }
+    T *operator->() { return Iter; }
+
+    Iterator &operator++() {
+      ++Iter;
+      advance();
+      return *this;
+    }
+    Iterator operator++(int) {
+      Iterator tmp = *this;
+      ++(*this);
+      return tmp;
+    }
+
+    // When an iterator is incremented, if we've reached the end of the
+    // vector, we pop the stack of reference-lists (or just return).
+    void advance() {
+      // If we've reached the end of a vector, pop it off the stack.
+      if (Iter == End) {
+        if (Iters.empty())
+          return;
+        Iter = Iters.back();
+        Iters.pop_back();
+        End = Ends.back();
+        Ends.pop_back();
+        ++Iter;
+        return advance();
+      }
+
+      // If the entry is a value, we're done advancing.
+      auto &Ref = *Iter;
+      if (!Ref.isCond())
+        return;
+
+      // Evaluate predicates until we find a true (or missing) one.
+      // When we counter a TRUE predicate, push the current list onto the
+      // reference-list stack, and start iterating over the new one.
+      for (auto *cond = Ref.getIfElse(); cond; cond = cond->getElseClause()) {
+        if (cond->evalPredicate(Ins)) {
+          if (cond->getRefs() == nullptr)
+            break;
+          Iters.push_back(Iter);
+          Iter = cond->getRefs()->begin();
+          Ends.push_back(End);
+          End = cond->getRefs()->end();
+          return advance();
+        }
+      }
+      Iter++; // Advance past a conditional ref with an empty clause.
+      return advance();
+    }
+
+    friend bool operator==(const Iterator &a, const Iterator &b) {
+      return a.Iter == b.Iter;
+    }
+    friend bool operator!=(const Iterator &a, const Iterator &b) {
+      return a.Iter != b.Iter;
+    }
+  };
+
+public:
+  ReferenceIter(InitializationVector<T> *Refs, Instr *Ins)
+      : Refs(Refs), Ins(Ins) {}
+
+  Iterator begin() { return Iterator(this->Ins, Refs); }
+  Iterator end() { return Iterator(this->Ins, Refs->end()); }
+};
+
+// Containers of initialized reference objects.
+using OperandRefVec = InitializationVector<OperandRef>;
+using ResourceRefVec = InitializationVector<ResourceRef>;
+using PooledResourceRefVec = InitializationVector<PooledResourceRef>;
+using OperandConstraintVec = InitializationVector<OperandConstraint>;
+
+// A set of subunits for a particular instruction/CPU combination
+using SubunitVec = InitializationVector<Subunit>;
+
+// A mapping of instructions to subunit lists.
+using SubunitTable = std::vector<SubunitVec *>;
+
+//-----------------------------------------------------------------------------
+/// A description of a single conditional reference object.
+/// Used for operand, resource, and pooled resource references.
+//-----------------------------------------------------------------------------
+template <class T> class ConditionalRef {
+  PredFunc Predicate;            // function to evaluate the predicate
+  InitializationVector<T> *Refs; // conditional refs
+  ConditionalRef<T> *ElseClause; // optional else clause
+public:
+  ConditionalRef(PredFunc Predicate, InitializationVector<T> *Refs,
+                 ConditionalRef<T> *ElseClause)
+      : Predicate(Predicate), Refs(Refs), ElseClause(ElseClause) {}
+
+  bool hasPredicate() const { return Predicate != nullptr; }
+  bool evalPredicate(Instr *ins) const {
+    return Predicate == nullptr || Predicate(ins);
+  }
+  InitializationVector<T> *getRefs() const { return Refs; }
+  ConditionalRef<T> *getElseClause() const { return ElseClause; }
+};
+
+//-----------------------------------------------------------------------------
+/// A reference to an instruction's operand.
+//-----------------------------------------------------------------------------
+class OperandRef {
+  ReferenceType Type;  // type of the reference
+  ReferenceFlag Flags; // protected or unprotected
+  PipePhaseType Phase; // pipeline phase of the reference
+  union {
+    PipeFunc PhaseFunc;                 // optional pointer to phase function
+    ConditionalRef<OperandRef> *IfElse; // conditional reference descriptor
+  };
+  OperandId OperandIndex; // operand index
+public:
+  // Construct a normal unconditional reference.
+  OperandRef(ReferenceType Type, ReferenceFlag Flags, PipePhaseType Phase,
+             PipeFunc PhaseFunc, OperandId OperandIndex)
+      : Type(Type), Flags(Flags), Phase(Phase), PhaseFunc(PhaseFunc),
+        OperandIndex(OperandIndex) {}
+  // Construct a conditional reference.
+  OperandRef(ConditionalRef<OperandRef> *IfElse)
+      : Type(ReferenceTypes::RefCond), IfElse(IfElse) {}
+
+  ReferenceType getType() const { return Type; }
+  bool isDef() const { return Type & ReferenceTypes::AnyDef; }
+  bool isUse() const { return Type & ReferenceTypes::AnyUse; }
+  bool isCond() const { return Type == ReferenceTypes::RefCond; }
+  bool isDefaultDef() const { return isDef() && OperandIndex == -1; }
+
+  ReferenceFlag getFlags() const { return Flags; }
+  bool isProtected() const { return Flags & ReferenceFlags::kProtected; }
+  bool isUnprotected() const { return Flags & ReferenceFlags::kUnprotected; }
+  bool isDuplicate() const { return Flags & ReferenceFlags::kDuplicate; }
+  int getPhase(Instr *Ins) const { return PhaseFunc ? PhaseFunc(Ins) : Phase; }
+  int getOperandIndex() const { return OperandIndex; }
+  ConditionalRef<OperandRef> *getIfElse() const { return IfElse; }
+};
+
+//-----------------------------------------------------------------------------
+/// A reference to a single resource.
+//-----------------------------------------------------------------------------
+class ResourceRef {
+  ReferenceType Type;        // type of the reference (def, use, etc)
+  ReferenceFlag Flags;       // protected, unprotected, or duplicate ref
+  PipePhaseType Phase;       // pipeline phase of the reference
+  PipeFunc PhaseFunc;        // optional pointer to phase function
+  UseCyclesType UseCycles;   // number of cycles a resource is "used"
+  ResourceIdType ResourceId; // the resource we're referencing
+  union {
+    OperandId OperandIndex; // operand index for shared resources.
+    unsigned MicroOps;      // number of microops for this resource.
+  };
+  PoolBitsType Width; // how many bits in value (-1 if not shared)
+  ConditionalRef<ResourceRef> *IfElse; // conditional reference descriptor
+public:
+  ResourceRef(ReferenceType Type, ReferenceFlag Flags, PipePhaseType Phase,
+              PipeFunc PhaseFunc, UseCyclesType UseCycles,
+              ResourceIdType ResourceId, OperandId OperandIndex,
+              PoolBitsType Width)
+      : Type(Type), Flags(Flags), Phase(Phase), PhaseFunc(PhaseFunc),
+        UseCycles(UseCycles), ResourceId(ResourceId),
+        OperandIndex(OperandIndex), Width(Width) {}
+
+  // Construct a conditional reference.
+  ResourceRef(ConditionalRef<ResourceRef> *IfElse)
+      : Type(ReferenceTypes::RefCond), IfElse(IfElse) {}
+
+  // Construct a fus reference
+  ResourceRef(ReferenceType Type, ReferenceFlag Flags, UseCyclesType UseCycles,
+              ResourceIdType ResourceId, int MicroOps)
+      : Type(Type), Flags(Flags), Phase(0), PhaseFunc(nullptr),
+        UseCycles(UseCycles), ResourceId(ResourceId), MicroOps(MicroOps) {}
+  // Construct a micro-ops reference with no functional unit resource.
+  ResourceRef(ReferenceType Type, ReferenceFlag Flags, int MicroOps)
+      : Type(Type), Flags(Flags), Phase(0), PhaseFunc(nullptr), UseCycles(0),
+        ResourceId(-1), MicroOps(MicroOps) {}
+
+  ReferenceType getType() const { return Type; }
+  ReferenceFlag getFlags() const { return Flags; }
+  bool isUse() const { return Type == ReferenceTypes::RefUse; }
+  bool isFus() const { return Type == ReferenceTypes::RefFus; }
+  bool isCond() const { return Type == ReferenceTypes::RefCond; }
+  bool isProtected() const { return Flags & ReferenceFlags::kProtected; }
+  bool isUnprotected() const { return Flags & ReferenceFlags::kUnprotected; }
+  bool isDuplicate() const { return Flags & ReferenceFlags::kDuplicate; }
+
+  bool isUnreserved() const { return Flags & ReferenceFlags::kUnreserved; }
+  bool isBuffered() const { return Flags & ReferenceFlags::kBuffered; }
+  bool isBeginGroup() const { return Flags & ReferenceFlags::kBeginGroup; }
+  bool isEndGroup() const { return Flags & ReferenceFlags::kEndGroup; }
+  bool isSingleIssue() const { return Flags & ReferenceFlags::kSingleIssue; }
+  bool isRetireOOO() const { return Flags & ReferenceFlags::kRetireOOO; }
+
+  int getPhase(Instr *Ins) const {
+    return PhaseFunc ? getResourcePhase(PhaseFunc, Ins) : Phase;
+  }
+  int getCycles() const { return UseCycles; }
+  int getResourceId() const { return ResourceId; }
+  bool hasResourceId() const { return ResourceId != -1; }
+  int getMicroOps() const { return MicroOps; }
+  int getOperandIndex() const { return OperandIndex; }
+  int getWidth() const { return Width; }
+  bool isValidOperandIndex() const { return OperandIndex >= 0; }
+  bool isShared() const { return Width > 0; }
+  ConditionalRef<ResourceRef> *getIfElse() const { return IfElse; }
+};
+
+//-----------------------------------------------------------------------------
+/// A descriptor of a single resource pool.
+//-----------------------------------------------------------------------------
+class PoolDescriptor {
+  PoolIdType PoolId;       // base pool id for this subpool
+  PoolIdType PoolSize;     // how many different allocation sizes in pool
+  PoolSizeType Count;      // number of entries needed
+  PoolFuncType PoolFunc;   // optional pointer to pool count func
+  OpndValueFunc ValueFunc; // optional pointer to fetch operand values
+  ResourceIdType First;    // index of first legal id
+  ResourceIdType Last;     // index of last legal id
+  PoolBitsType Width;      // how many bits in value (-1 if not shared)
+public:
+  PoolDescriptor(PoolIdType PoolId, PoolIdType PoolSize, PoolSizeType Count,
+                 PoolFuncType PoolFunc, OpndValueFunc ValueFunc,
+                 ResourceIdType First, ResourceIdType Last, PoolBitsType Width)
+      : PoolId(PoolId), PoolSize(PoolSize), Count(Count), PoolFunc(PoolFunc),
+        ValueFunc(ValueFunc), First(First), Last(Last), Width(Width) {}
+
+  // Return the number of individual resources needed. This is either a
+  // constant value, or we can call a function to determine it based on
+  // the instruction instance.
+  int getCount(Instr *Inst, int OperandId) const {
+    return PoolFunc ? PoolFunc(Inst, OperandId) : Count;
+  }
+
+  // Fetch operand values from an instruction, used to facilitate
+  // sharing resources of shared values.
+  bool getValues(Instr *Inst, int OperandId, int Count, int Values[]) {
+    return ValueFunc ? ValueFunc(Inst, OperandId, Count, Values) : false;
+  }
+  bool hasValueFunc() const { return ValueFunc != nullptr; }
+  int getFirst() const { return First; }
+  int getLast() const { return Last; }
+  int getSize() const { return Last - First + 1; }
+  int getWidth() const { return Width; }
+  int getPoolSize() const { return PoolSize; }
+  int getPoolId() const { return PoolId; }
+  int isShared() const { return Width > 0; }
+};
+
+//-----------------------------------------------------------------------------
+/// A reference to a resource pool.
+//-----------------------------------------------------------------------------
+class PooledResourceRef {
+  ReferenceType Type;          // type of the reference
+  ReferenceFlag Flags;         // protected, or unprotected
+  PipePhaseType Phase;         // pipeline phase of the reference
+  PipeFunc PhaseFunc;          // optional pointer to phase function
+  UseCyclesType Cycles;        // number of cycles resource is used
+  ResourceIdType *ResourceIds; // the resources we're referencing
+  OperandId OperandIndex;      // operand index for shared resources
+  int MicroOps = 0;            // number of microops for an Fus entry
+  union {
+    PoolDescriptor *Pool; // pointer to pool descriptor object
+    ConditionalRef<PooledResourceRef> *IfElse; // conditional ref descriptor
+  };
+
+public:
+  PooledResourceRef(ReferenceType Type, ReferenceFlag Flags,
+                    PipePhaseType Phase, PipeFunc PipeFunc,
+                    UseCyclesType Cycles, ResourceIdType *ResourceIds,
+                    OperandId OperandIndex, PoolDescriptor *Pool)
+      : Type(Type), Flags(Flags), Phase(Phase), PhaseFunc(PipeFunc),
+        Cycles(Cycles), ResourceIds(ResourceIds), OperandIndex(OperandIndex),
+        Pool(Pool) {}
+  // Construct a conditional reference.
+  PooledResourceRef(ConditionalRef<PooledResourceRef> *IfElse)
+      : Type(ReferenceTypes::RefCond), IfElse(IfElse) {}
+  // Constructor for a pooled functional unit reference.
+  PooledResourceRef(ReferenceType Type, ReferenceFlag Flags,
+                    UseCyclesType Cycles, ResourceIdType *ResourceIds,
+                    PoolDescriptor *Pool, int MicroOps)
+      : Type(Type), Flags(Flags), Phase(0), PhaseFunc(nullptr), Cycles(Cycles),
+        ResourceIds(ResourceIds), OperandIndex(0), MicroOps(MicroOps),
+        Pool(Pool) {}
+
+  ReferenceType getType() const { return Type; }
+  ReferenceFlag getFlags() const { return Flags; }
+  bool isUse() const { return Type == ReferenceTypes::RefUse; }
+  bool isFus() const { return Type == ReferenceTypes::RefFus; }
+  bool isCond() const { return Type == ReferenceTypes::RefCond; }
+  bool IsProtected() const { return Flags & ReferenceFlags::kProtected; }
+  bool IsUnprotected() const { return Flags & ReferenceFlags::kUnprotected; }
+  bool IsDuplicate() const { return Flags & ReferenceFlags::kDuplicate; }
+
+  bool isUnreserved() const { return Flags & ReferenceFlags::kUnreserved; }
+  bool isBuffered() const { return Flags & ReferenceFlags::kBuffered; }
+  bool isBeginGroup() const { return Flags & ReferenceFlags::kBeginGroup; }
+  bool isEndGroup() const { return Flags & ReferenceFlags::kEndGroup; }
+  bool isSingleIssue() const { return Flags & ReferenceFlags::kSingleIssue; }
+  bool isRetireOOO() const { return Flags & ReferenceFlags::kRetireOOO; }
+
+  int getPhase(Instr *Ins) const {
+    return PhaseFunc ? getResourcePhase(PhaseFunc, Ins) : Phase;
+  }
+  unsigned getCycles() const { return Cycles; }
+  ResourceIdType *getResourceIds() const { return ResourceIds; }
+  int getOperandIndex() const { return OperandIndex; }
+  int getMicroOps() const { return MicroOps; }
+  PoolDescriptor *getPool() const { return Pool; }
+  int getPoolId() const { return Pool->getPoolId(); }
+  int getPoolSize() const { return Pool->getPoolSize(); }
+  int getCount(Instr *Inst, int OperandId) const {
+    return Pool->getCount(Inst, OperandId);
+  }
+  int getFirst() const { return Pool->getFirst(); }
+  int getLast() const { return Pool->getLast(); }
+  int getSize() const { return Pool->getSize(); }
+  int getWidth() const { return Pool->getWidth(); }
+  bool isShared() const { return Pool->isShared(); }
+  ConditionalRef<PooledResourceRef> *getIfElse() const { return IfElse; }
+};
+
+/// A register constraint on a single operand.
+class OperandConstraint {
+  OperandId OperandIndex;
+  RegisterClassIndexType ClassIndex;
+  ConditionalRef<OperandConstraint> *IfElse; // conditional constraint
+public:
+  OperandConstraint(OperandId OperandIndex, RegisterClassIndexType ClassIndex)
+      : OperandIndex(OperandIndex), ClassIndex(ClassIndex), IfElse(nullptr) {}
+  // Construct a conditional reference.
+  OperandConstraint(ConditionalRef<OperandConstraint> *IfElse)
+      : IfElse(IfElse) {}
+
+  int getOperandIndex() const { return OperandIndex; }
+  int getClassIndex() const { return ClassIndex; }
+
+  ConditionalRef<OperandConstraint> *getIfElse() const { return IfElse; }
+  bool isCond() { return IfElse != nullptr; }
+};
+
+/// A single subunit definition.  A subunit completely describes the register
+/// and resource behavior of the instance of an instruction (or a set of
+/// instructions).
+class Subunit {
+  OperandRefVec *OperandReferences = nullptr;
+  ResourceRefVec *UsedResourceReferences = nullptr;
+  ResourceRefVec *HeldResourceReferences = nullptr;
+  ResourceRefVec *ReservedResourceReferences = nullptr;
+  PooledResourceRefVec *PooledResourceReferences = nullptr;
+  OperandConstraintVec *Constraints = nullptr;
+
+public:
+  Subunit(OperandRefVec *OperandReferences,
+          ResourceRefVec *UsedResourceReferences,
+          ResourceRefVec *HeldResourceReferences,
+          ResourceRefVec *ReservedResourceReferences,
+          PooledResourceRefVec *PooledResourceReferences,
+          OperandConstraintVec *Constraints)
+      : OperandReferences(OperandReferences),
+        UsedResourceReferences(UsedResourceReferences),
+        HeldResourceReferences(HeldResourceReferences),
+        ReservedResourceReferences(ReservedResourceReferences),
+        PooledResourceReferences(PooledResourceReferences),
+        Constraints(Constraints) {}
+  // Simpler constructor for the common case of empty parameters.
+  Subunit(OperandRefVec *OperandReferences,
+          ResourceRefVec *UsedResourceReferences)
+      : OperandReferences(OperandReferences),
+        UsedResourceReferences(UsedResourceReferences) {}
+
+  OperandRefVec *getOperandReferences() const { return OperandReferences; }
+  ResourceRefVec *getUsedResourceReferences() const {
+    return UsedResourceReferences;
+  }
+  ResourceRefVec *getHeldResourceReferences() const {
+    return HeldResourceReferences;
+  }
+  ResourceRefVec *getReservedResourceReferences() const {
+    return ReservedResourceReferences;
+  }
+  PooledResourceRefVec *getPooledResourceReferences() const {
+    return PooledResourceReferences;
+  }
+  OperandConstraintVec *getConstraints() const { return Constraints; }
+};
+
+// CPU configuration parameters, determined by the MDL compiler, based on the
+// machine description.  This is used to specialize CpuInfo methods for
+// bundle packing and scheduling.
+template <int MRI, int MURI, int MFUI, int PC, int MPA, int MI, int RBS,
+          int EUP, int LP, int HLDP, int MRP>
+struct CpuParams {
+  static const int MaxResourceId = MRI;        // maximum resource id
+  static const int MaxUsedResourceId = MURI;   // maximum "used" resource
+  static const int MaxFuncUnitId = MFUI;       // maximum functional unit id
+  static const int PoolCount = PC;             // number of pools defined
+  static const int MaxPoolAllocation = MPA;    // biggest pool allocation
+  static const int MaxIssue = MI;              // maximum parallel issue
+  static const int ReorderBufferSize = RBS;    // instr reorder buffer size
+  static const int EarlyUsePhase = EUP;        // earliest operand use phase
+  static const int LoadPhase = LP;             // default phase for loads
+  static const int HighLatencyDefPhase = HLDP; // high latency def phase
+  static const int MaxResourcePhase = MRP;     // latest resource "use" phase
+};
+
+// An abstract type that describes the interface to a CPU-specific resource
+// reservation table.
+class Reservations {
+public:
+  Reservations(){};
+  virtual ~Reservations() = default;
+  virtual Reservations *allocate() { return nullptr; }
+  virtual Reservations *allocate(unsigned II) { return nullptr; }
+  virtual void advance() {}
+  virtual void recede() {}
+  virtual void merge(Reservations *input) {}
+  virtual Reservations *clone() { return nullptr; }
+  virtual void reset() = 0;
+  virtual void setCycle(unsigned InsertCycle) {}
+
+  static constexpr unsigned power_of_2(unsigned number) {
+    unsigned result = 1;
+    while (number > result)
+      result <<= 1;
+    return result;
+  }
+};
+
+/// A 2D bitset representing resources used by a window of instructions over
+/// the pipeline phases of an instruction execution. This does not necessarily
+/// include the entire pipeline, or all declared resources, but just the
+/// resources and phases representing issue, pool, and hazard resources
+/// (determined by the mdl compiler).
+/// In addition to testing, setting, and removing members, you can also
+/// "advance" the pipeline forward and backward in time and OR two sets.
+/// For efficiency, this object is implemented as a power-of-2-sized circular
+/// buffer of bitsets. Since its specialized for each CPU, the sizes are all
+/// constant, so everything is statically allocated.
+template <typename CpuParams> class ReservationsConfig : public Reservations {
+  static constexpr unsigned Size = power_of_2(CpuParams::MaxResourcePhase + 1);
+  unsigned Head = 0;
+  using Resources = std::bitset<CpuParams::MaxUsedResourceId + 1>;
+  Resources Bits[Size];
+
+  Resources &Item(unsigned Phase) { return Bits[(Head + Phase) & (Size - 1)]; }
+
+public:
+  ReservationsConfig() { reset(); }
+  Reservations *allocate() override {
+    return static_cast<Reservations *>(new ReservationsConfig());
+  }
+  Reservations *clone() override {
+    return static_cast<Reservations *>(new ReservationsConfig(*this));
+  }
+
+  void reset() override {
+    for (unsigned i = 0; i < Size; i++)
+      Bits[i].reset();
+    Head = 0;
+  }
+
+  void advance() override {
+    Bits[Head].reset();
+    Head = (Head + 1) & (Size - 1);
+  }
+  void recede() override {
+    Head = (Head - 1) & (Size - 1);
+    Bits[Head].reset();
+  }
+
+  void set(int Bit, unsigned Phase) { Item(Phase).set(Bit); }
+  void clr(int Bit, unsigned Phase) { Item(Phase).reset(Bit); }
+  bool test(int Bit, unsigned Phase) { return Item(Phase).test(Bit); }
+  bool testSet(int Bit, unsigned Phase) {
+    if (test(Bit, Phase))
+      return true;
+    set(Bit, Phase);
+    return false;
+  }
+
+  void set(int Bit, unsigned Phase, unsigned Cycles) {
+    for (unsigned i = 0; i < Cycles; i++)
+      set(Bit, Phase + i);
+  }
+  void clr(int Bit, unsigned Phase, unsigned Cycles) {
+    for (unsigned i = 0; i < Cycles; i++)
+      clr(Bit, Phase + i);
+  }
+  bool test(int Bit, unsigned Phase, unsigned Cycles) {
+    for (unsigned i = 0; i < Cycles; i++)
+      if (test(Bit, Phase + i))
+        return true;
+    return false;
+  }
+  bool testSet(int Bit, unsigned Phase, unsigned Cycles) {
+    if (test(Bit, Phase, Cycles))
+      return true;
+    set(Bit, Phase, Cycles);
+    return false;
+  }
+
+  void merge(Reservations *input) override {
+    auto *my_input = static_cast<ReservationsConfig<CpuParams> *>(input);
+    for (unsigned i = 0; i < Size; i++)
+      Item(i) |= my_input->Item(i);
+  }
+
+  // Return the count of resources used in a set of phases.
+  unsigned popl(unsigned Early, unsigned Late) {
+    unsigned Count = 0;
+    for (unsigned time = Early; time <= Late; time++)
+      Count += Item(time).count();
+    return Count;
+  }
+};
+
+/// A 2D bitset representing a modulo resource table for software pipelining.
+/// Like ReservationsConfig, this only needs to model resources used
+/// for issue, slot allocation, pools, and hazards.
+/// Unlike ReservationsConfig, these objects have a "current cycle" that
+/// resource references occur in. Since the reservation table needs to model
+/// the entire loop, there's no method for "advancing" or "receding" the
+/// pipeline mode, but clients do need to set the insertion when attempting
+/// to schedule an instruction.
+/// Note: the II can be larger than the MDL-generated maximum pipeline depth.
+template <typename CpuParams>
+class ModuloReservationsConfig : public Reservations {
+  unsigned int II = 0;    // The II we're attempting to schedule at.
+  unsigned int Cycle = 0; // Current cycle to insert at.
+  using Resources = std::bitset<CpuParams::MaxUsedResourceId + 1>;
+  Resources *Bits;
+
+  Resources &Item(unsigned Phase) { return Bits[(Cycle + Phase) % II]; }
+
+public:
+  ModuloReservationsConfig(unsigned II) : II(II), Bits(new Resources[II]) {}
+
+  ~ModuloReservationsConfig() { delete[] Bits; }
+
+  Reservations *allocate(unsigned II) override {
+    return static_cast<Reservations *>(new ModuloReservationsConfig(II));
+  }
+
+  void setCycle(unsigned InsertCycle) override { Cycle = InsertCycle; }
+  void reset() override {
+    for (unsigned i = 0; i < II; i++)
+      Bits[i].reset();
+  }
+
+  void set(int Bit, unsigned Phase) { Item(Phase).set(Bit); }
+  void clr(int Bit, unsigned Phase) { Item(Phase).reset(Bit); }
+  bool test(int Bit, unsigned Phase) { return Item(Phase).test(Bit); }
+  bool testSet(int Bit, unsigned Phase) {
+    if (test(Bit, Phase))
+      return true;
+    set(Bit, Phase);
+    return false;
+  }
+
+  void set(int Bit, unsigned Phase, unsigned Cycles) {
+    for (unsigned i = 0; i < Cycles; i++)
+      set(Bit, Phase + i);
+  }
+  void clr(int Bit, unsigned Phase, unsigned Cycles) {
+    for (unsigned i = 0; i < Cycles; i++)
+      clr(Bit, Phase + i);
+  }
+  bool test(int Bit, unsigned Phase, unsigned Cycles) {
+    for (unsigned i = 0; i < Cycles; i++)
+      if (test(Bit, Phase + i))
+        return true;
+    return false;
+  }
+  bool testSet(int Bit, unsigned Phase, unsigned Cycles) {
+    if (test(Bit, Phase, Cycles))
+      return true;
+    set(Bit, Phase, Cycles);
+    return false;
+  }
+};
+
+///----------------------------------------------------------------------------
+/// Abstract interface to an llvm instruction. This object provides
+/// a common interface to the MDL compiler for accessing information in
+/// EITHER MachineInstrs and MCInsts.
+///----------------------------------------------------------------------------
+class Instr {
+  // Descriptors for MachineInst records.
+  const MachineInstr *MI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+
+  // Descriptors for MCInstr records.
+  const MCInst *MC = nullptr;
+  const MCSubtargetInfo *STI = nullptr;
+  const MCInstrInfo *MCII = nullptr;
+
+  CpuInfo *Cpu = nullptr;
+
+public:
+  Instr(const MachineInstr *MI, const TargetSubtargetInfo *STI);
+  Instr(const MachineInstr *MI, const TargetInstrInfo *TII, CpuInfo *Cpu);
+  Instr(const MCInst *MC, const MCSubtargetInfo *STI, const MCInstrInfo *MCII);
+
+  const MachineInstr *getMI() const { return MI; }
+  const TargetInstrInfo *getTII() const { return TII; }
+
+  const MCInst *getMC() const { return MC; }
+  const MCSubtargetInfo *getSTI() const { return STI; }
+  const MCInstrInfo *getMCII() const { return MCII; }
+  CpuInfo *getCpuInfo() { return Cpu; }
+
+  bool isMC() const { return MC != nullptr; }
+  bool isMI() const { return MI != nullptr; }
+
+  // Get the LLVM name for this instruction.
+  std::string getName();
+  /// Fetch the instruction's opcode.
+  int getOpcode();
+
+  /// Evaluate a Target-library instruction predicate for this instruction.
+  bool evaluatePredicate(int PredId);
+
+  /// Return the raw bits associated with an operand.
+  int64_t getOperand(int OperandIndex);
+  /// Return various attributes of an instruction's operand.
+  bool isOpndLiteral(int OperandIndex);
+  bool isOpndAddress(int OperandIndex);
+  bool isOpndLabel(int OperandIndex);
+  bool isOpndRegister(int OperandIndex);
+  bool isOpndVirtualRegister(int OperandIndex);
+
+  /// Return true if a MachineInstr has more operands than described in its
+  /// MCInst description.
+  bool hasExtraOperands();
+
+  /// Fetch the instruction's currently assigned subunit.  TODO: We don't have
+  /// a way to record the selected subunit in an instruction, so for now just
+  /// return 0 (ie, the first subunit).
+  int getSubunitId() { return 0; }
+
+  /// Return the set of subunits for an instruction and CPU combination.
+  SubunitVec *getSubunit();
+};
+
+///----------------------------------------------------------------------------
+/// MDL-based Bundle Packer definitions.  This provides object definitions that
+/// are needed for the bundle packing implementation.  Since the implmentation
+/// is specialized for each CPU, we need to define these separately from the
+/// implementation.
+///----------------------------------------------------------------------------
+class SlotDesc;
+class PoolRequest;
+
+/// A shared resource contains a resource id, an operand id, a value, and a
+/// resource count.  A SharedResourceSet is all the resource allocations for
+/// a single slot.
+using SharedResource = std::tuple<int, int, int, int>;
+using SharedResourceSet = std::vector<SharedResource>;
+
+using SlotSet = std::vector<SlotDesc>;
+using InstrSet = std::vector<MachineInstr *>;
+using MCInstrSet = std::vector<MCInst *>;
+using PoolRequestSet = std::vector<PoolRequest>;
+
+/// When we attempt to bundle an instruction, there are three possible
+/// (internal) outcomes. Either we succeed in the bundling, or we fail to
+/// find a valid bundle, or we fail to allocate pooled resources.
+enum class BundleStatus {
+  kSuccess,          // Bundling and Resource Allocation succeeded.
+  kBundleFailed,     // Bundling failed.
+  kAllocationFailed, // Bundling worked, Resource Allocation failed.
+};
+
+/// A working set of values allocated to resources, used by bundling to
+/// allocate shared resources in a bundle.
+template <typename CpuParams> class ResourceValues {
+  int Values[CpuParams::MaxUsedResourceId + 1];
+  bool Valid[CpuParams::MaxUsedResourceId + 1] = {false};
+
+public:
+  bool check(int resource_id, int Values[], int count) {
+    for (int id = 0; id < count; id++, resource_id++)
+      if (!Valid[resource_id] || Values[resource_id] != Values[id])
+        return false;
+    return true;
+  }
+  void set(int ResourceId, int Values[], int Count) {
+    for (int id = 0; id < Count; id++, ResourceId++) {
+      Valid[ResourceId] = true;
+      Values[ResourceId] = Values[id];
+    }
+  }
+};
+
+/// Representation of a single issue slot.  A slot contains the entire context
+/// of how an instruction is bundled: the instruction itself, all the subunits
+/// it qualifies for, the selected subunit id, and the resources assigned to
+/// the instruction in the current bundle.
+class SlotDesc {
+  Instr Inst;                  // instruction description
+  SubunitVec *Subunits;        // pointer to vector of legal subunits
+  int SubunitId;               // currently selected subunit id
+  SharedResourceSet Resources; // resources reserved for instruction
+public:
+  SlotDesc(const MCInst *MC, const MCSubtargetInfo *STI,
+           const MCInstrInfo *MCII);
+  SlotDesc(MachineInstr *MI, const TargetSubtargetInfo *STI);
+
+  Instr *getInst() { return &Inst; }
+  const MachineInstr *getMI() const { return Inst.getMI(); }
+  SubunitVec *getSubunits() const { return Subunits; }
+  int getSubunitId() const { return SubunitId; }
+  void setSubunitId(int Id) { SubunitId = Id; }
+
+  Subunit *getSubunit() const { return &(*Subunits)[SubunitId]; }
+  SharedResourceSet &getResources() { return Resources; }
+  void setResources(const SharedResourceSet &Res) { Resources = Res; }
+};
+
+/// Specify a single pool request for a candidate instruction. This object
+/// is internal to the bundle packer, and is used to fulfill instructions'
+/// pooled allocation requests.
+class PoolRequest {
+  SlotDesc *WhichSlot;    // Slot/instruction making request
+  PooledResourceRef *Ref; // the pooled resource request
+  int Count;              // how many resources requested
+  int Phase;              // what pipeline phase
+public:
+  PoolRequest(SlotDesc *WhichSlot, PooledResourceRef *Ref)
+      : WhichSlot(WhichSlot), Ref(Ref) {
+    Count = Ref->getCount(WhichSlot->getInst(), Ref->getOperandIndex());
+    Phase = Ref->getPhase(WhichSlot->getInst());
+  }
+
+  int getFirst() const { return Ref->getFirst(); }
+  int getLast() const { return Ref->getLast(); }
+  int getSize() const { return Ref->getSize(); }
+  int getCount() const { return Count; }
+  int getWidth() const { return Ref->getWidth(); }
+  int getPhase() const { return Phase; }
+  int getCycles() const { return Ref->getCycles(); }
+  ResourceIdType *getResourceIds() const { return Ref->getResourceIds(); }
+  int getPoolId() const { return Ref->getPoolId(); }
+  int getSubpoolId() const {
+    return getPoolId() + Ref->getPool()->getPoolSize() - getCount();
+  }
+  Instr *getInst() const { return WhichSlot->getInst(); }
+  int getOperandId() const { return Ref->getOperandIndex(); }
+  PooledResourceRef *getRef() const { return Ref; }
+  SlotDesc *getSlot() const { return WhichSlot; }
+  bool isShared() const { return getRef()->isShared(); }
+};
+
+/// Collection of all pool requests for a set of candidate instructions,
+/// organized by pool id.
+template <typename CpuParams> class PoolRequests {
+  PoolRequestSet Pools[CpuParams::PoolCount ? CpuParams::PoolCount : 1];
+
+public:
+  PoolRequests() : Pools() {}
+  auto &getPool(int Index) { return Pools[Index]; }
+  void AddPoolRequest(SlotDesc *WhichSlot, PooledResourceRef *Item) {
+    PoolRequest request(WhichSlot, Item);
+    if (request.getCount() != 0)
+      Pools[request.getSubpoolId()].push_back(request);
+  }
+};
+
+///----------------------------------------------------------------------------
+/// Information for each defined CPU. Each MDL CPU corresponds to a single
+/// LLVM target or, roughly, a single SchedMachineModel. CpuInfo contains
+/// instruction behaviors specific to that SchedMachineModel, as well as
+/// forwarding information and some "worst-case" instruction behaviors.
+///----------------------------------------------------------------------------
+class CpuInfo {
+  unsigned MaxResourceId = 0;       // maximum resource id
+  unsigned MaxUsedResourceId = 0;   // maximum "used" resource
+  unsigned MaxFuncUnitId = 0;       // max functional unit resource id
+  unsigned PoolCount = 0;           // number of pools defined
+  unsigned MaxPoolAllocation = 0;   // max resources alloced for a pool
+  unsigned MaxIssue = 0;            // maximum parallel issue
+  unsigned ReorderBufferSize = 0;   // instruction reorder buffer size
+  unsigned EarlyUsePhase = 0;       // earliest phase of operand uses
+  unsigned LoadPhase = 0;           // default phase for load instructions
+  unsigned HighLatencyDefPhase = 0; // high latency def instruction phase
+  unsigned MaxResourcePhase = 0;    // latest resource "use" phase
+  SubunitTable *(*InitSubunitTable)() = nullptr;
+  int8_t **ForwardTable = nullptr;  // forwarding info table, or null
+  SubunitTable *Subunits = nullptr; // instruction-to-subunit mapping
+  unsigned ResourceFactor = 1;      // Cpu-specific resource factor
+
+  // A CPU can have a set of Target-library predicates, which are only used
+  // if the LLVM Target library is included in an application. This vector is
+  // generated by the MDL compiler, and is initialized here when the Subtarget
+  // object is initialized.
+  std::vector<PredFunc> *InstrPredicates = nullptr;
+
+public:
+  CpuInfo(unsigned MaxResourceId, unsigned MaxUsedResourceId,
+          unsigned MaxFuncUnitId, unsigned PoolCount,
+          unsigned MaxPoolAllocation, unsigned MaxIssue,
+          unsigned ReorderBufferSize, unsigned EarlyUsePhase,
+          unsigned LoadPhase, unsigned HighLatencyDefPhase,
+          unsigned MaxResourcePhase, SubunitTable *(*InitSubunitTable)(),
+          int8_t **ForwardTable, unsigned ResourceFactor)
+      : MaxResourceId(MaxResourceId), MaxUsedResourceId(MaxUsedResourceId),
+        MaxFuncUnitId(MaxFuncUnitId), PoolCount(PoolCount),
+        MaxPoolAllocation(MaxPoolAllocation), MaxIssue(MaxIssue),
+        ReorderBufferSize(ReorderBufferSize), EarlyUsePhase(EarlyUsePhase),
+        LoadPhase(LoadPhase), HighLatencyDefPhase(HighLatencyDefPhase),
+        MaxResourcePhase(MaxResourcePhase), InitSubunitTable(InitSubunitTable),
+        ForwardTable(ForwardTable), ResourceFactor(ResourceFactor) {}
+  CpuInfo() {}
+  virtual ~CpuInfo() = default;
+
+  //------------------------------------------------------------------------
+  // These functions return all the top-level attributes of the CPU.
+  //------------------------------------------------------------------------
+  unsigned getMaxResourceId() const { return MaxResourceId; }
+  unsigned getMaxUsedResourceId() const { return MaxUsedResourceId; }
+  unsigned getMaxFuncUnitId() const { return MaxFuncUnitId; }
+  bool isFuncUnitId(int id) const { return (unsigned)id <= MaxFuncUnitId; }
+
+  unsigned getPoolCount() const { return PoolCount; }
+  unsigned getMaxPoolAllocation() const { return MaxPoolAllocation; }
+
+  unsigned getMaxIssue() const { return MaxIssue; }
+  unsigned getReorderBufferSize() const { return ReorderBufferSize; }
+
+  unsigned getEarlyUsePhase() const { return EarlyUsePhase; }
+  unsigned getLoadPhase() const { return LoadPhase; }
+  unsigned getHighLatencyDefPhase() const { return HighLatencyDefPhase; }
+  unsigned getMaxResourcePhase() const { return MaxResourcePhase; }
+  int8_t **getForwardTable() const { return ForwardTable; }
+  unsigned getResourceFactor() const { return ResourceFactor; }
+
+  //------------------------------------------------------------------------
+  // Functions for managing the subunit and predicate tables.
+  //------------------------------------------------------------------------
+  SubunitTable *getSubunits() const { return Subunits; }
+  SubunitVec *getSubunit(int opcode) const { return (*Subunits)[opcode]; }
+  bool IsInstruction(int Opcode, int OperandId) const {
+    if (OperandId == -1)
+      return false;
+    return getSubunit(Opcode) != nullptr;
+  }
+
+  // A subunit table is only initialized once, when it is selected for use.
+  // Call the MDL-generated function to initialize it.
+  void InitSubunits() {
+    if (Subunits == nullptr)
+      Subunits = InitSubunitTable();
+  }
+
+  // Register a set of Subtarget-specific predicates for this subtarget.
+  void SetInstrPredicates(std::vector<PredFunc> *Preds) {
+    InstrPredicates = Preds;
+  }
+
+  // Optionally evaluate a Subtarget-specific predicate function (generated
+  // by the MDL compiler).
+  bool evaluatePredicate(int Index, Instr *MI) {
+    if (InstrPredicates == nullptr)
+      return false;
+    return (*InstrPredicates)[Index](MI);
+  }
+
+  //------------------------------------------------------------------------
+  // These functions look for various attributes on explicit functional unit
+  // references. Note that these reference lists typically have only a single
+  // entry, so this should be very fast.
+  //------------------------------------------------------------------------
+  // Return true if an instruction must begin an issue group.
+  bool mustBeginGroup(const MachineInstr *MI, const TargetSubtargetInfo *STI) {
+    Instr Ins(MI, STI);
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.isBeginGroup())
+            return true;
+    }
+    return false;
+  }
+
+  // Return true if an instruction must end an issue group.
+  bool mustEndGroup(const MachineInstr *MI, const TargetSubtargetInfo *STI) {
+    Instr Ins(MI, STI);
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.isEndGroup())
+            return true;
+    }
+    return false;
+  }
+
+  // Return true if an instruction must be single-issued.
+  bool isSingleIssue(const MachineInstr *MI, const TargetSubtargetInfo *STI) {
+    Instr Ins(MI, STI);
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.isSingleIssue())
+            return true;
+    }
+    return false;
+  }
+
+  // Return true if an instruction has the RetireOOO attribute.
+  bool isRetireOOO(const MachineInstr *MI, const TargetSubtargetInfo *STI) {
+    Instr Ins(MI, STI);
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.isRetireOOO())
+            return true;
+    }
+    return false;
+  }
+
+  //------------------------------------------------------------------------
+  // Return the total number of micro-ops for an instruction.
+  //------------------------------------------------------------------------
+  int numMicroOps(Instr Ins) const {
+    int MicroOps = 0;
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus())
+            MicroOps += Ref.getMicroOps();
+    }
+    return MicroOps;
+  }
+
+  int numMicroOps(const MachineInstr *MI, const TargetSubtargetInfo *STI) {
+    return numMicroOps(Instr(MI, STI));
+  }
+  int numMicroOps(const MCInst *MI, const MCSubtargetInfo *STI,
+                  const MCInstrInfo *MCII) {
+    return numMicroOps(Instr(MI, STI, MCII));
+  }
+
+  //------------------------------------------------------------------------
+  // Calculate the reciprocal throughput for an instruction.
+  //------------------------------------------------------------------------
+  double getReciprocalThroughput(Instr Ins) const {
+    double Throughput = 0.0;
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.getCycles()) {
+            double Temp = 1.0 / Ref.getCycles();
+            Throughput = Throughput ? std::min(Throughput, Temp) : Temp;
+          }
+      if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+        for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins))
+          if (Ref.isFus()) { // Pools always have non-zero cycles
+            double Temp = (Ref.getSize() * 1.0) / Ref.getCycles();
+            Throughput = Throughput ? std::min(Throughput, Temp) : Temp;
+          }
+    }
+    if (Throughput != 0.0)
+      return 1.0 / Throughput;
+    return (numMicroOps(Ins) * 1.0) / getMaxIssue();
+  }
+
+  double getReciprocalThroughput(const TargetSubtargetInfo *STI,
+                                 const MachineInstr *MI) {
+    return getReciprocalThroughput(Instr(MI, STI));
+  }
+  double getReciprocalThroughput(const MCSubtargetInfo *STI,
+                                 const MCInstrInfo *MCII, const MCInst *MI) {
+    return getReciprocalThroughput(Instr(MI, STI, MCII));
+  }
+
+  //------------------------------------------------------------------------
+  // Abstract interface to bundle packing infrastructure.
+  //------------------------------------------------------------------------
+  virtual bool addToBundle(SlotSet &Bundle, const SlotDesc &Candidate,
+                           const Reservations &Res) = 0;
+  virtual bool canAddToBundle(SlotSet &Bundle, const SlotDesc &Candidate,
+                              const Reservations &Res) = 0;
+  virtual void addBundleToReservation(SlotSet &Bundle, Reservations &Res) = 0;
+  virtual void deleteBundleFromReservation(SlotSet &Bundle, Reservations &Res) {
+  }
+  virtual SlotSet bundleCandidates(const SlotSet *Candidates) = 0;
+  virtual Reservations *allocReservations() const = 0;
+  virtual Reservations *allocModuloReservations(int II) const = 0;
+
+  // Bundle packing debug functions.
+  void dumpBundle(std::string Cpu, std::string Msg, SlotSet &Bundle);
+  void dumpBundle(SlotSet &Bundle) { dumpBundle("", "", Bundle); }
+  std::string dumpSlot(std::string Msg, SlotDesc &Slot);
+  virtual bool validateBundle(std::string Cpu, SlotSet &Bundle) = 0;
+};
+
+///----------------------------------------------------------------------------
+/// CPU-specific object that describes parameters of the target.
+/// The primary role of this object is to provide a bundle packing API that
+/// is specialized for each target and subtarget.
+///----------------------------------------------------------------------------
+template <typename CpuParams> class CpuConfig : public CpuInfo {
+public:
+  CpuConfig(SubunitTable *(*InitSubunitTable)(), int8_t **ForwardTable,
+            unsigned ResourceFactor)
+      : CpuInfo(CpuParams::MaxResourceId, CpuParams::MaxUsedResourceId,
+                CpuParams::MaxFuncUnitId, CpuParams::PoolCount,
+                CpuParams::MaxPoolAllocation, CpuParams::MaxIssue,
+                CpuParams::ReorderBufferSize, CpuParams::EarlyUsePhase,
+                CpuParams::LoadPhase, CpuParams::HighLatencyDefPhase,
+                CpuParams::MaxResourcePhase, InitSubunitTable, ForwardTable,
+                ResourceFactor) {}
+
+  // CPU-specialized bundle packing functions.
+  bool addToBundle(SlotSet &Bundle, const SlotDesc &Candidate,
+                   const Reservations &Res) override;
+  bool canAddToBundle(SlotSet &Bundle, const SlotDesc &Candidate,
+                      const Reservations &Res) override;
+  SlotSet bundleCandidates(const SlotSet *Candidates) override;
+  Reservations *allocReservations() const override {
+    return new ReservationsConfig<CpuParams>;
+  }
+  Reservations *allocModuloReservations(int II) const override {
+    return new ModuloReservationsConfig<CpuParams>(II);
+  }
+  void addBundleToReservation(SlotSet &Bundle, Reservations &Res) override;
+
+  // Internal functions to help with bundle packing.
+  BundleStatus attemptToBundle(SlotSet &Bundle,
+                               ReservationsConfig<CpuParams> &Res,
+                               int WhichSlot, bool Reset);
+  bool addResources(SlotDesc &Slot, Subunit &WhichSubunit,
+                    ReservationsConfig<CpuParams> &res);
+  void findStaticResources(SlotSet &Bundle, ResourceValues<CpuParams> &Values);
+
+  // Internal pool allocation functions.
+  BundleStatus allocatePools(SlotSet &Bundle,
+                             ReservationsConfig<CpuParams> &Res);
+  bool allocateResource(PoolRequest &Item, int Id, int Count,
+                        ReservationsConfig<CpuParams> &Res,
+                        ResourceValues<CpuParams> &Values);
+  bool allocatePool(PoolRequestSet &Pool, ReservationsConfig<CpuParams> &Res,
+                    ResourceValues<CpuParams> &Values);
+  bool validateBundle(std::string Cpu, SlotSet &Bundle) override;
+};
+
+// A simple wrapper to check the range for calculated resource latencies.
+inline int getResourcePhase(PipeFunc Func, Instr *Ins) {
+  return std::min(Func(Ins), Ins->getCpuInfo()->getMaxResourcePhase());
+}
+
+///----------------------------------------------------------------------------
+/// A CPU dictionary is the top-level object in the database, and describes
+/// each defined CPU in the family. Each CPU object in this table corresponds
+/// to a single SchedMachineModel.
+///----------------------------------------------------------------------------
+using CpuTableDict = std::map<std::string, CpuInfo *>;
+
+class CpuTableDef {
+  // A dictionary of all CPUs defined in the description, indexed by name.
+  CpuTableDict &Cpus;
+
+public:
+  explicit CpuTableDef(CpuTableDict &Cpus) : Cpus(Cpus) {}
+
+  CpuInfo *getCpu(std::string name) const {
+    if (!Cpus.count(name))
+      return nullptr;
+    auto *cpu = Cpus[name];
+    cpu->InitSubunits();
+    return cpu;
+  }
+
+  bool hasCpus() const { return !Cpus.empty(); }
+  int getCpuCount() const { return Cpus.size(); }
+
+  // Register a set of Subtarget-specific predicates with each subtarget.
+  void SetInstrPredicates(std::vector<PredFunc> *Preds) {
+    for (auto [Name, Cpu] : Cpus)
+      Cpu->SetInstrPredicates(Preds);
+  }
+};
+
+} // namespace mdl
+} // namespace llvm
+
+///----------------------------------------------------------------------------
+/// MDLBundle.h contains template function definitions that provide the
+/// implementations of all bundle packing infrastructure which are based on
+/// the CpuConfig templatized object.  We include it here to avoid having to
+/// include it everywhere that we include MDLInfo.h.
+///----------------------------------------------------------------------------
+#include "llvm/MC/MDLBundle.h"
+
+#endif // MDL_INFO_H
diff --git a/llvm/include/llvm/MC/MDLInstrInfo.h b/llvm/include/llvm/MC/MDLInstrInfo.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/MC/MDLInstrInfo.h
@@ -0,0 +1,60 @@
+//===- MDLInstrInfo.h - MDL-based instruction modeling --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a set of APIs between the MDL database and the CodeGen
+// and MC libraries.  The MDL database uses the Instr class to access
+// information about MachineInstr and MCInst objects, and the CodeGen/MC
+// libraries use these interfaces to calculate instruction latencies.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MDL_INSTR_INFO_H
+#define MDL_INSTR_INFO_H
+
+#include "llvm/MC/MDLInfo.h"
+#include <vector>
+
+namespace llvm {
+namespace mdl {
+
+/// Calculate the latency between two instructions' operands.
+int calculateOperandLatency(const Instr *Def, unsigned DefOpId,
+                            const Instr *Use, unsigned UseOpId);
+
+/// Wrapper for MachineInstr Objects.
+int calculateOperandLatency(const MachineInstr *Def, unsigned DefOpId,
+                            const MachineInstr *Use, unsigned UseOpId,
+                            const TargetSubtargetInfo *STI);
+
+/// Find the maximum latency of an instruction based on operand references.
+int calculateInstructionLatency(Instr *Inst);
+
+/// Wrapper for MCInst objects.
+int calculateInstructionLatency(const MCInst *Inst, const MCSubtargetInfo *STI,
+                                const MCInstrInfo *MCII);
+/// Wrapper for MachineInstr objects.
+int calculateInstructionLatency(const MachineInstr *Inst,
+                                const TargetSubtargetInfo *STI);
+
+/// Calculate the latency between two instructions that hold or reserve the
+/// same resource.
+int calculateHazardLatency(const Instr *Reserve, const Instr *Hold);
+
+/// Wrapper for MCInst objects.
+int calculateHazardLatency(const MCInst *Reserve, const MCInst *Hold,
+                           const MCSubtargetInfo *STI, const MCInstrInfo *MCII);
+
+/// Wrapper for MachineInstr objects.
+int calculateHazardLatency(const MachineInstr *Reserve,
+                           const MachineInstr *Hold,
+                           const TargetSubtargetInfo *STI);
+
+} // namespace mdl
+} // namespace llvm
+
+#endif // MDL_INSTR_INFO_H
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MDLHazardRecognizer.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
@@ -117,15 +118,22 @@
 
 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
                                        MachineLoopInfo &mli, AAResults *aa)
-    : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
+    : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa),
+      HazardRec(nullptr) {
   ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
   ResourceTracker->setTrackResources(true);
   VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
+
+  auto &STI = MF.getSubtarget();
+  if (STI.getCpuInfo())
+    HazardRec = new MDLHazardRecognizer(&STI, "machine-scheduler-mdl");
 }
 
 VLIWPacketizerList::~VLIWPacketizerList() {
   delete VLIWScheduler;
   delete ResourceTracker;
+  if (HazardRec)
+    delete HazardRec;
 }
 
 // End the current packet, bundle packet instructions and reset DFA state.
@@ -146,7 +154,10 @@
     finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
   }
   CurrentPacketMIs.clear();
-  ResourceTracker->clearResources();
+  if (HazardRec)
+    HazardRec->clearResources();
+  else
+    ResourceTracker->clearResources();
   LLVM_DEBUG(dbgs() << "End packet\n");
 }
 
@@ -199,8 +210,8 @@
 
     // Ask DFA if machine resource is available for MI.
     LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
+    bool ResourceAvail = canReserveResources(MI);
 
-    bool ResourceAvail = ResourceTracker->canReserveResources(MI);
     LLVM_DEBUG({
       if (ResourceAvail)
         dbgs() << "  Resources are available for adding MI to packet\n";
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -212,6 +212,7 @@
                           MachineTraceMetrics::Trace BlockTrace,
                           const MachineBasicBlock &MBB) {
   SmallVector<unsigned, 16> InstrDepth;
+
   // For each instruction in the new sequence compute the depth based on the
   // operands. Use the trace information when possible. For new operands which
   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
@@ -445,6 +446,9 @@
     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs) {
+
+  // TODO-MDL: Implement this for MDL model.
+
   if (!TSchedModel.hasInstrSchedModel())
     return true;
 
@@ -551,7 +555,7 @@
     // Found pattern, but did not generate alternative sequence.
     // This can happen e.g. when an immediate could not be materialized
     // in a single instruction.
-    if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
+    if (InsInstrs.empty() || !TSchedModel.hasAnySchedModel())
       continue;
 
     unsigned NewRootLatency, RootLatency;
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1011,10 +1011,38 @@
   // Compute the number of functional unit alternatives needed
   // at each stage, and take the minimum value. We prioritize the
   // instructions by the least number of choices first.
+
   unsigned minFuncUnits(const MachineInstr *Inst,
                         InstrStage::FuncUnits &F) const {
-    unsigned SchedClass = Inst->getDesc().getSchedClass();
     unsigned min = UINT_MAX;
+
+    // Implement minFuncUnits for an Mdl Model
+    if (STI && STI->hasMdlModel()) {
+      Instr Ins(Inst, static_cast<const TargetSubtargetInfo *>(STI));
+      auto *Subunits = Ins.getSubunit();
+      if (Subunits == nullptr)
+        return 1;
+      for (auto &Unit : *Subunits) {
+        if (auto *Refs = Unit.getUsedResourceReferences()) {
+          for (auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+            if (Ref.isFus() && Ref.getCycles() && Ref.hasResourceId()) {
+              F = Ref.getResourceId();
+              return 1;
+            }
+        }
+        // TODO-MDL : we need to return some value in F.
+        if (auto *Prefs = Unit.getPooledResourceReferences()) {
+          for (auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins))
+            if (Ref.isFus())
+              min = std::min(min, (unsigned)Ref.getSize());
+        }
+      }
+      if (min == UINT_MAX)
+        return Subunits->size();
+      return min;
+    }
+
+    unsigned SchedClass = Inst->getDesc().getSchedClass();
     if (InstrItins && !InstrItins->isEmpty()) {
       for (const InstrStage &IS :
            make_range(InstrItins->beginStage(SchedClass),
@@ -1057,9 +1085,25 @@
   // Compute the critical resources needed by the instruction. This
   // function records the functional units needed by instructions that
   // must use only one functional unit. We use this as a tie breaker
-  // for computing the resource MII. The instrutions that require
+  // for computing the resource MII. The instructions that require
   // the same, highly used, functional unit have high priority.
   void calcCriticalResources(MachineInstr &MI) {
+    if (STI && STI->hasMdlModel()) {
+      Instr Ins(&MI, static_cast<const TargetSubtargetInfo *>(STI));
+      if (auto *Subunit = Ins.getSubunit()) {
+        if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+          for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+            if (Ref.isFus() && Ref.hasResourceId() && Ref.getCycles())
+              Resources[Ref.getResourceId()]++;
+        if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+          for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins))
+            if (Ref.isFus())
+              for (int Res = Ref.getFirst(); Res <= Ref.getLast(); Res++)
+                Resources[Ref.getResourceIds()[Res]]++;
+      }
+      return;
+    }
+
     unsigned SchedClass = MI.getDesc().getSchedClass();
     if (InstrItins && !InstrItins->isEmpty()) {
       for (const InstrStage &IS :
@@ -3087,6 +3131,7 @@
   });
 }
 
+// TODO-MDL - Write MDL version of this
 void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
                                        int Cycle) {
   assert(!UseDFA);
@@ -3099,6 +3144,7 @@
     ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
 }
 
+// TODO-MDL - Write MDL version of this
 void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
                                          int Cycle) {
   assert(!UseDFA);
@@ -3111,6 +3157,7 @@
     --NumScheduledMops[positiveModulo(C, InitiationInterval)];
 }
 
+// TODO-MDL - Write MDL version of this
 bool ResourceManager::isOverbooked() const {
   assert(!UseDFA);
   for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
@@ -3125,6 +3172,7 @@
   return false;
 }
 
+// TODO-MDL - Write MDL version of this
 int ResourceManager::calculateResMIIDFA() const {
   assert(UseDFA);
 
@@ -3188,6 +3236,7 @@
   return Resmii;
 }
 
+// TODO-MDL - implement MDL version of this.
 int ResourceManager::calculateResMII() const {
   if (UseDFA)
     return calculateResMIIDFA();
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -70,6 +70,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace mdl;
 
 #define DEBUG_TYPE "machine-scheduler"
 
@@ -2227,21 +2228,49 @@
 void SchedRemainder::
 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
   reset();
-  if (!SchedModel->hasInstrSchedModel())
-    return;
   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
-  for (SUnit &SU : DAG->SUnits) {
-    const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
-    RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
-      * SchedModel->getMicroOpFactor();
-    for (TargetSchedModel::ProcResIter
-           PI = SchedModel->getWriteProcResBegin(SC),
-           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      unsigned PIdx = PI->ProcResourceIdx;
-      unsigned Factor = SchedModel->getResourceFactor(PIdx);
-      assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
-      RemainingCounts[PIdx] +=
-          (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
+
+  // The MDL version of this is essentially the same as the InstrSchedModel
+  // version, except that we use precomputed resource factors based on
+  // pool size rather than resource id.
+  if (auto *Cpu = SchedModel->getCpuInfo()) {
+    for (SUnit &SU : DAG->SUnits) {
+      mdl::Instr Ins(SU.getInstr(), SchedModel->getSubtargetInfo());
+      RemIssueCount += Cpu->numMicroOps(Ins) * SchedModel->getMicroOpFactor();
+      if (auto *Subunit = Ins.getSubunit()) {
+        if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+          for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+            if (Ref.isFus() && Ref.hasResourceId() && Ref.getCycles())
+              RemainingCounts[Ref.getResourceId()] +=
+                  Ref.getCycles() * SchedModel->getResourceFactor(1);
+        if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+          for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins))
+            if (Ref.isFus()) {
+              int Factor = SchedModel->getResourceFactor(Ref.getSize());
+              int Cycles = Ref.getCycles() * Factor;
+              for (int res = Ref.getFirst(); res <= Ref.getLast(); res++)
+                RemainingCounts[Ref.getResourceIds()[res]] += Cycles;
+            }
+      }
+    }
+    return;
+  }
+
+  if (SchedModel->hasInstrSchedModel()) {
+    for (SUnit &SU : DAG->SUnits) {
+      const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
+      RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) *
+                       SchedModel->getMicroOpFactor();
+      for (TargetSchedModel::ProcResIter
+               PI = SchedModel->getWriteProcResBegin(SC),
+               PE = SchedModel->getWriteProcResEnd(SC);
+           PI != PE; ++PI) {
+        unsigned PIdx = PI->ProcResourceIdx;
+        unsigned Factor = SchedModel->getResourceFactor(PIdx);
+        assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
+        RemainingCounts[PIdx] +=
+            (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
+      }
     }
   }
 }
@@ -2252,6 +2281,17 @@
   DAG = dag;
   SchedModel = smodel;
   Rem = rem;
+  // For an MDL-based model, pools and groups are handled in the MDL compiler,
+  // so we don't need to deal with them here.
+  if (SchedModel->hasMdlModel()) {
+    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+    ReservedCycles.resize(ResourceCount, InvalidCycle);
+    ReservedCyclesIndex.resize(ResourceCount);
+    ExecutedResCounts.resize(ResourceCount);
+    for (unsigned Idx = 0; Idx < ResourceCount; ++Idx)
+      ReservedCyclesIndex[Idx] = Idx;
+    return;
+  }
   if (SchedModel->hasInstrSchedModel()) {
     unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
     ReservedCyclesIndex.resize(ResourceCount);
@@ -2330,11 +2370,15 @@
   unsigned MinNextUnreserved = InvalidCycle;
   unsigned InstanceIdx = 0;
   unsigned StartIndex = ReservedCyclesIndex[PIdx];
-  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
-  assert(NumberOfInstances > 0 &&
-         "Cannot have zero instances of a ProcResource");
+  unsigned NumberOfInstances = 1;
+  if (!SchedModel->hasMdlModel()) {
+    NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
+    assert(NumberOfInstances > 0 &&
+           "Cannot have zero instances of a ProcResource");
+  }
 
-  if (isUnbufferedGroup(PIdx)) {
+  // Mdl-based models don't need to deal with explicitly-defined groups.
+  if (!SchedModel->hasMdlModel() && isUnbufferedGroup(PIdx)) {
     // If any subunits are used by the instruction, report that the
     // subunits of the resource group are available at the first cycle
     // in which the unit is available, effectively removing the group
@@ -2421,7 +2465,63 @@
     return true;
   }
 
-  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+  if (!SU->hasReservedResource)
+    return false;
+
+  if (SchedModel->hasMdlModel()) {
+    mdl::Instr Ins(SU->getInstr(), SchedModel->getSubtargetInfo());
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins)) {
+          if (Ref.hasResourceId()) {
+            unsigned ResIdx = Ref.getResourceId();
+            auto [NRCycle, InstanceIdx] = getNextResourceCycle(
+                nullptr, ResIdx, Ref.getCycles(), Ref.getPhase(&Ins));
+            if (NRCycle > CurrCycle) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+              MaxObservedStall =
+                  std::max((unsigned)Ref.getCycles(), MaxObservedStall);
+#endif
+              LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
+                                << SchedModel->getResourceName(ResIdx) << '['
+                                << InstanceIdx - ReservedCyclesIndex[ResIdx]
+                                << ']' << "=" << NRCycle << "c\n");
+              return true;
+            }
+          }
+        }
+      if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+        for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins)) {
+          if (Ref.isFus() && Ref.getCycles()) {
+            double Cycles = Ref.getCycles();
+            unsigned MinRes = 0, InstanceIdx, NRCycle = InvalidCycle;
+            for (int Res = Ref.getFirst(); Res <= Ref.getLast(); Res++) {
+              auto [NextUnreserved, NextInstanceIdx] =
+                  getNextResourceCycle(nullptr, Ref.getResourceIds()[Res],
+                                       Cycles, Ref.getPhase(&Ins));
+              if (NRCycle > NextUnreserved) {
+                InstanceIdx = NextInstanceIdx;
+                NRCycle = NextUnreserved;
+                MinRes = Res;
+              }
+            }
+            if (NRCycle != InvalidCycle && NRCycle > CurrCycle) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+              MaxObservedStall = std::max(Ref.getCycles(), MaxObservedStall);
+#endif
+              LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
+                                << SchedModel->getResourceName(MinRes) << '['
+                                << InstanceIdx - ReservedCyclesIndex[MinRes]
+                                << ']' << "=" << NRCycle << "c\n");
+              return true;
+            }
+          }
+        }
+    }
+    return false;
+  }
+
+  if (SchedModel->hasInstrSchedModel()) {
     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
     for (const MCWriteProcResEntry &PE :
           make_range(SchedModel->getWriteProcResBegin(SC),
@@ -2472,7 +2572,7 @@
 unsigned SchedBoundary::
 getOtherResourceCount(unsigned &OtherCritIdx) {
   OtherCritIdx = 0;
-  if (!SchedModel->hasInstrSchedModel())
+  if (!SchedModel->hasInstrSchedModel() && !SchedModel->hasMdlModel())
     return 0;
 
   unsigned OtherCritCount = Rem->RemIssueCount
@@ -2487,6 +2587,7 @@
       OtherCritIdx = PIdx;
     }
   }
+  // TODO-MDL - Need an MDL-specific version of this
   if (OtherCritIdx) {
     LLVM_DEBUG(
         dbgs() << "  " << Available.getName() << " + Remain CritRes: "
@@ -2585,10 +2686,8 @@
 /// \return the next cycle at which the instruction may execute without
 /// oversubscribing resources.
 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
-                                      unsigned ReleaseAtCycle,
-                                      unsigned NextCycle,
+                                      unsigned ReleaseAtCycle, unsigned Factor,
                                       unsigned AcquireAtCycle) {
-  unsigned Factor = SchedModel->getResourceFactor(PIdx);
   unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
   LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
                     << ReleaseAtCycle << "x" << Factor << "u\n");
@@ -2635,7 +2734,6 @@
   }
   // checkHazard should prevent scheduling multiple instructions per cycle that
   // exceed the issue width.
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
   assert(
       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
@@ -2666,8 +2764,8 @@
   }
   RetiredMOps += IncMOps;
 
-  // Update resource counts and critical resource.
-  if (SchedModel->hasInstrSchedModel()) {
+  // Update critical resource
+  if (SchedModel->hasInstrSchedModel() || SchedModel->hasMdlModel()) {
     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
     Rem->RemIssueCount -= DecRemIssue;
@@ -2686,11 +2784,83 @@
                           << "c\n");
       }
     }
+  }
+
+  // Update resource counts for MdlModel.
+  if (SchedModel->hasMdlModel()) {
+    Instr Ins(SU->getInstr(), SchedModel->getSubtargetInfo());
+    if (auto *Subunit = Ins.getSubunit()) {
+      auto *Refs = (*Subunit)[0].getUsedResourceReferences();
+      if (Refs)
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.hasResourceId() && Ref.getCycles()) {
+            unsigned Factor = SchedModel->getResourceFactor(1);
+            unsigned Pidx = Ref.getResourceId();
+            NextCycle = std::max(NextCycle,
+                                 countResource(nullptr, Pidx, Ref.getCycles(),
+                                               Factor, Ref.getPhase(&Ins)));
+          }
+      auto *Prefs = (*Subunit)[0].getPooledResourceReferences();
+      if (Prefs)
+        for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins)) {
+          if (Ref.isFus()) {
+            unsigned Factor = SchedModel->getResourceFactor(Ref.getSize());
+            unsigned Cycles = Ref.getCycles();
+            for (int Res = Ref.getFirst(); Res <= Ref.getLast(); Res++) {
+              unsigned ResId = Ref.getResourceIds()[Res];
+              NextCycle = std::max(NextCycle,
+                                   countResource(nullptr, ResId, Cycles, Factor,
+                                                 Ref.getPhase(&Ins)));
+            }
+          }
+        }
+      // For reserved resources, record the highest cycle using the resource.
+      if (SU->hasReservedResource) {
+        if (Refs)
+          for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+            if (Ref.isFus() && Ref.hasResourceId() && Ref.getCycles()) {
+              if (!Ref.isBuffered()) {
+                auto [ReservedUntil, InstanceIdx] =
+                    getNextResourceCycle(nullptr, Ref.getResourceId(),
+                                         Ref.getCycles(), Ref.getPhase(&Ins));
+                if (isTop())
+                  ReservedCycles[InstanceIdx] =
+                      std::max(ReservedUntil, NextCycle + Ref.getCycles());
+                else
+                  ReservedCycles[InstanceIdx] = NextCycle;
+              }
+            }
+        if (Prefs)
+          for (const auto &Ref :
+               ReferenceIter<PooledResourceRef>(Prefs, &Ins)) {
+            if (Ref.isFus()) {
+              unsigned Cycles = Ref.getCycles();
+              for (int Res = Ref.getFirst(); Res <= Ref.getLast(); Res++) {
+                unsigned ResId = Ref.getResourceIds()[Res];
+                if (!Ref.isBuffered()) {
+                  auto [ReservedUntil, InstanceIdx] = getNextResourceCycle(
+                      nullptr, ResId, Ref.getCycles(), Ref.getPhase(&Ins));
+                  if (isTop())
+                    ReservedCycles[InstanceIdx] =
+                        std::max(ReservedUntil, NextCycle + Cycles);
+                  else
+                    ReservedCycles[InstanceIdx] = NextCycle;
+                }
+              }
+            }
+          }
+      }
+    }
+  }
+  // Update resource counts for InstrSchedModel.
+  else if (SchedModel->hasInstrSchedModel()) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
     for (TargetSchedModel::ProcResIter
            PI = SchedModel->getWriteProcResBegin(SC),
            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      unsigned Factor = SchedModel->getResourceFactor(PI->ProcResourceIdx);
       unsigned RCycle =
-          countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
+          countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, Factor,
                         PI->AcquireAtCycle);
       if (RCycle > NextCycle)
         NextCycle = RCycle;
@@ -2921,6 +3091,35 @@
   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
     return;
 
+  if (SchedModel->hasMdlModel()) {
+    Instr Ins(SU->getInstr(), SchedModel->getSubtargetInfo());
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins)) {
+          if (Ref.isFus() && Ref.hasResourceId() && Ref.getCycles()) {
+            unsigned ResId = Ref.getResourceId();
+            if (ResId == Policy.ReduceResIdx)
+              ResDelta.CritResources += Ref.getCycles();
+            if (ResId == Policy.DemandResIdx)
+              ResDelta.DemandedResources += Ref.getCycles();
+          }
+        }
+      if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+        for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins)) {
+          if (Ref.isFus() && Ref.getCycles()) {
+            for (int Res = Ref.getFirst(); Res <= Ref.getLast(); Res++) {
+              unsigned ResId = Ref.getResourceIds()[Res];
+              if (ResId == Policy.ReduceResIdx)
+                ResDelta.CritResources += Ref.getCycles();
+              if (ResId == Policy.DemandResIdx)
+                ResDelta.DemandedResources += Ref.getCycles();
+            }
+          }
+        }
+    }
+    return;
+  }
+
   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   for (TargetSchedModel::ProcResIter
          PI = SchedModel->getWriteProcResBegin(SC),
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -37,6 +37,7 @@
 #include <utility>
 
 using namespace llvm;
+using namespace mdl;
 
 #define DEBUG_TYPE "machine-trace-metrics"
 
@@ -115,27 +116,56 @@
     if (MI.isCall())
       FBI->HasCalls = true;
 
-    // Count processor resources used.
-    if (!SchedModel.hasInstrSchedModel())
-      continue;
-    const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
-    if (!SC->isValid())
+    // Count processor resources used for MDL-based model.
+    if (SchedModel.hasMdlModel()) {
+      Instr Ins(&MI, SchedModel.getSubtargetInfo());
+      if (auto *Subunit = Ins.getSubunit()) {
+        int ResFactor = SchedModel.getResourceFactor(1);
+        if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+          for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+            if (Ref.isFus() && Ref.hasResourceId() && Ref.getCycles())
+              PRCycles[Ref.getResourceId()] += Ref.getCycles() * ResFactor;
+
+        if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+          for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins))
+            if (Ref.isFus()) {
+              int Factor = SchedModel.getResourceFactor(Ref.getSize());
+              int Cycles = Ref.getCycles() * Factor;
+              for (int res = Ref.getFirst(); res <= Ref.getLast(); res++)
+                PRCycles[Ref.getResourceIds()[res]] += Cycles;
+            }
+      }
       continue;
+    }
 
-    for (TargetSchedModel::ProcResIter
-         PI = SchedModel.getWriteProcResBegin(SC),
-         PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
-      assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
-      PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
+    // Count processor resources used for SchedModel-based model.
+    if (SchedModel.hasInstrSchedModel()) {
+      const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
+      if (!SC->isValid())
+        continue;
+
+      for (TargetSchedModel::ProcResIter
+               PI = SchedModel.getWriteProcResBegin(SC),
+               PE = SchedModel.getWriteProcResEnd(SC);
+           PI != PE; ++PI) {
+        assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+        PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
+      }
     }
   }
   FBI->InstrCount = InstrCount;
-
-  // Scale the resource cycles so they are comparable.
   unsigned PROffset = MBB->getNumber() * PRKinds;
-  for (unsigned K = 0; K != PRKinds; ++K)
-    ProcReleaseAtCycles[PROffset + K] =
-      PRCycles[K] * SchedModel.getResourceFactor(K);
+
+  // Save release cycles for each resource.
+  if (SchedModel.hasMdlModel()) {
+    for (unsigned K = 0; K != PRKinds; ++K)
+      ProcReleaseAtCycles[PROffset + K] = PRCycles[K];
+  } else {
+    // Scale the resource cycles so they are comparable.
+    for (unsigned K = 0; K != PRKinds; ++K)
+      ProcReleaseAtCycles[PROffset + K] =
+          PRCycles[K] * SchedModel.getResourceFactor(K);
+  }
 
   return FBI;
 }
@@ -877,6 +907,7 @@
     TBI.CriticalPath = 0;
 
     // Print out resource depths here as well.
+    // TODO-MDL - Need an MDL-specific version of this
     LLVM_DEBUG({
       dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
       ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
@@ -1057,6 +1088,7 @@
     TBI.HasValidInstrHeights = true;
     TBI.CriticalPath = 0;
 
+    // TODO-MDL - Need an MDL-specific version of this
     LLVM_DEBUG({
       dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
       ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
@@ -1225,6 +1257,7 @@
   return std::max(Instrs, PRMax);
 }
 
+// TODO-MDL - Need an MDL-specific version of this
 unsigned MachineTraceMetrics::Trace::getResourceLength(
     ArrayRef<const MachineBasicBlock *> Extrablocks,
     ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MDLInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -58,6 +59,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace mdl;
 
 #define DEBUG_TYPE "machine-scheduler"
 
@@ -599,6 +601,32 @@
     // Unbuffered resources prevent execution of subsequent instructions that
     // require the same resources. This is used for in-order execution pipelines
     // within an out-of-order core. These are identified by BufferSize=1.
+    if (SchedModel.hasMdlModel()) {
+      Instr Ins(SU->getInstr(), SchedModel.getSubtargetInfo());
+      if (auto *Subunit = Ins.getSubunit()) {
+        if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+          for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins)) {
+            if (Ref.isFus() && Ref.getCycles()) {
+              if (Ref.isUnreserved())
+                SU->isUnbuffered = true;
+              if (!Ref.isBuffered())
+                SU->hasReservedResource = true;
+            }
+          }
+        if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+          for (const auto &Ref :
+               ReferenceIter<PooledResourceRef>(Prefs, &Ins)) {
+            if (Ref.isFus()) {
+              if (Ref.isUnreserved())
+                SU->isUnbuffered = true;
+              if (!Ref.isBuffered())
+                SU->hasReservedResource = true;
+            }
+          }
+      }
+      continue;
+    }
+
     if (SchedModel.hasInstrSchedModel()) {
       const MCSchedClassDesc *SC = getSchedClass(SU);
       for (const MCWriteProcResEntry &PRE :
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -20,6 +20,7 @@
 
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -40,12 +41,15 @@
     "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
     cl::desc("Track reg pressure and switch priority to in-depth"));
 
-ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
-    : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
+ResourcePriorityQueue::ResourcePriorityQueue(
+    SelectionDAGISel *IS, ScheduleHazardRecognizer *HazardRec)
+    : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()),
+      HazardRec(HazardRec) {
   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
   TRI = STI.getRegisterInfo();
   TLI = IS->TLI;
   TII = STI.getInstrInfo();
+  Cpu = STI.getCpuInfo();
   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
   // This hard requirement could be relaxed, but for now
   // do not let it proceed.
@@ -233,6 +237,15 @@
   Queue.push_back(SU);
 }
 
+void ResourcePriorityQueue::reset() {
+  if (Cpu) {
+    HazardRec->Reset();
+  } else {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+}
+
 /// Check if scheduling of this SU is possible
 /// in the current packet.
 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
@@ -249,6 +262,11 @@
   if (SU->getNode()->isMachineOpcode())
     switch (SU->getNode()->getMachineOpcode()) {
     default:
+      if (Cpu) {
+        if (!HazardRec->canReserveResources(*SU->getInstr()))
+          return false;
+        break;
+      }
       if (!ResourcesModel->canReserveResources(&TII->get(
           SU->getNode()->getMachineOpcode())))
            return false;
@@ -279,14 +297,17 @@
 
 /// Keep track of available resources.
 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
-  // If this SU does not fit in the packet
-  // start a new one.
-  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
-    ResourcesModel->clearResources();
-    Packet.clear();
-  }
+  // If this SU does not fit in the packet start a new one.
+  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode())
+    reset();
 
   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+    if (Cpu) {
+      HazardRec->reserveResources(*SU->getInstr());
+      if (HazardRec->IssueSize() >= Cpu->getMaxIssue())
+        reset();
+      return;
+    }
     switch (SU->getNode()->getMachineOpcode()) {
     default:
       ResourcesModel->reserveResources(&TII->get(
@@ -302,17 +323,13 @@
     Packet.push_back(SU);
   }
   // Forcefully end packet for PseudoOps.
-  else {
-    ResourcesModel->clearResources();
-    Packet.clear();
-  }
+  else
+    reset();
 
   // If packet is now full, reset the state so in the next cycle
   // we start fresh.
-  if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
-    ResourcesModel->clearResources();
-    Packet.clear();
-  }
+  if (Packet.size() >= InstrItins->SchedModel.IssueWidth)
+    reset();
 }
 
 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -63,11 +63,11 @@
   AAResults *AA;
 
 public:
-  ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,
-                  SchedulingPriorityQueue *availqueue)
-      : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
-    const TargetSubtargetInfo &STI = mf.getSubtarget();
+  ScheduleDAGVLIW(SelectionDAGISel *IS)
+      : ScheduleDAGSDNodes(*IS->MF), AA(IS->AA) {
+    const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
     HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+    AvailableQueue = new ResourcePriorityQueue(IS, HazardRec);
   }
 
   ~ScheduleDAGVLIW() override {
@@ -267,5 +267,5 @@
 /// createVLIWDAGScheduler - This creates a top-down list scheduler.
 ScheduleDAGSDNodes *
 llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
-  return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+  return new ScheduleDAGVLIW(IS);
 }
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/MDLHazardRecognizer.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -1259,6 +1260,9 @@
 // Default implementation of CreateTargetMIHazardRecognizer.
 ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
     const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+  auto &STI = DAG->MF.getSubtarget();
+  if (STI.getCpuInfo())
+    return new MDLHazardRecognizer(&STI, "machine-scheduler-mdl");
   return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
 }
 
@@ -1266,6 +1270,9 @@
 ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
+  auto &STI = DAG->MF.getSubtarget();
+  if (STI.getCpuInfo())
+    return new MDLHazardRecognizer(&STI, "post-RA-sched-mdl");
   return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
 
diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -20,8 +20,11 @@
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MDLInfo.h"
+#include "llvm/MC/MDLInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -29,6 +32,7 @@
 #include <numeric>
 
 using namespace llvm;
+using namespace mdl;
 
 static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
   cl::desc("Use TargetSchedModel for latency lookup"));
@@ -48,14 +52,33 @@
   return EnableSchedItins && !InstrItins.isEmpty();
 }
 
+bool TargetSchedModel::hasMdlModel() const { return STI->hasMdlModel(); }
+mdl::CpuInfo *TargetSchedModel::getCpuInfo() const { return STI->getCpuInfo(); }
+
 void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
   STI = TSInfo;
   SchedModel = TSInfo->getSchedModel();
   TII = TSInfo->getInstrInfo();
   STI->initInstrItins(InstrItins);
 
+  // ResourceLCM is computed in the MDL compiler for each CPU.
+  if (hasMdlModel()) {
+    ResourceLCM = STI->getCpuInfo()->getResourceFactor();
+    MicroOpFactor = ResourceLCM / STI->getCpuInfo()->getMaxIssue();
+
+    // MDL functional unit resources don't have "NumUnits", instead we use
+    // explicit "pools" to allocate them.  So here we pre-compute all
+    // feasible pool size factors into ResourceFactors, then index it with
+    // the pool size.
+    ResourceFactors.resize(ResourceLCM + 1);
+    for (unsigned PoolSize = 1; PoolSize <= ResourceLCM; PoolSize++)
+      ResourceFactors[PoolSize] = ResourceLCM / PoolSize;
+    return;
+  }
+
   unsigned NumRes = SchedModel.getNumProcResourceKinds();
   ResourceFactors.resize(NumRes);
+
   ResourceLCM = SchedModel.IssueWidth;
   for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
     unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
@@ -69,9 +92,11 @@
   }
 }
 
-/// Returns true only if instruction is specified as single issue.
+/// Returns true only if instruction is specified as beginning an issue group.
 bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
                                      const MCSchedClassDesc *SC) const {
+  if (hasMdlModel())
+    return STI->getCpuInfo()->mustBeginGroup(MI, STI);
   if (hasInstrSchedModel()) {
     if (!SC)
       SC = resolveSchedClass(MI);
@@ -81,8 +106,11 @@
   return false;
 }
 
+/// Returns true only if instruction is specified as ending an issue group.
 bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
                                      const MCSchedClassDesc *SC) const {
+  if (hasMdlModel())
+    return STI->getCpuInfo()->mustEndGroup(MI, STI);
   if (hasInstrSchedModel()) {
     if (!SC)
       SC = resolveSchedClass(MI);
@@ -94,6 +122,9 @@
 
 unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
                                           const MCSchedClassDesc *SC) const {
+  if (hasMdlModel())
+    return STI->getCpuInfo()->numMicroOps(MI, STI);
+
   if (hasInstrItineraries()) {
     int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
     return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
@@ -168,14 +199,60 @@
   return UseIdx;
 }
 
+static unsigned compareLatencies(const TargetInstrInfo *TII,
+                                 const MachineInstr *Def, int DefOpIdx,
+                                 const MachineInstr *Use, int UseOpIdx,
+                                 int MdlLatency, unsigned BaseLatency,
+                                 int Advance) {
+  // Adjust LLVM Schedule-based latency by the Advance amount.
+  unsigned Latency = BaseLatency - Advance;
+  if (Advance > 0 && (unsigned)Advance > BaseLatency) // unsigned wrap
+    Latency = 0;
+
+  if (MdlLatency == -1 || MdlLatency == (int)Latency)
+    return Latency;
+
+#if 0 // Enable this to debug latency calculation
+  if (Use == nullptr || UseOpIdx == -1) {
+    llvm::dbgs() << formatv("[{0}<{1}> nullptr <-1> ] ",
+                            Def->getOpcode(), DefOpIdx);
+  } else {
+    llvm::dbgs() << formatv("[{0}<{1}> {2}<{3}> ] ",
+                 Def->getOpcode(), DefOpIdx, Use->getOpcode(), UseOpIdx);
+  }
+
+  llvm::dbgs() << formatv("{0}-->", TII->getName(Def->getOpcode()));
+
+  if (Use == nullptr || UseOpIdx == -1)
+    llvm::dbgs() << "<none> == ";
+  else
+    llvm::dbgs() << formatv("{0} == ", TII->getName(Use->getOpcode()));
+
+  llvm::dbgs() << formatv("Expected: {0}({1}) Calculated: {2}\n",
+               Latency, Advance, MdlLatency);
+  // llvm_unreachable("MDLCheckLatency :: Different calculated latencies");
+#endif
+
+  return MdlLatency;
+}
+
+#include <stdio.h>
+
 // Top-level API for clients that know the operand indices.
 unsigned TargetSchedModel::computeOperandLatency(
   const MachineInstr *DefMI, unsigned DefOperIdx,
   const MachineInstr *UseMI, unsigned UseOperIdx) const {
 
-  if (!hasInstrSchedModel() && !hasInstrItineraries())
+  if (!hasAnySchedModel())
     return TII->defaultDefLatency(SchedModel, *DefMI);
 
+  // If we have an MDL model, use it to determine the latency.
+  int MdlLatency = -1;
+  if (hasMdlModel())
+    return llvm::mdl::calculateOperandLatency(DefMI, DefOperIdx, UseMI,
+                                              UseOperIdx, STI);
+
+  // Use itineraries to calculate the latency.
   if (hasInstrItineraries()) {
     int OperLatency = 0;
     if (UseMI) {
@@ -187,7 +264,8 @@
       OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
     }
     if (OperLatency >= 0)
-      return OperLatency;
+      return compareLatencies(TII, DefMI, DefOperIdx, UseMI, UseOperIdx,
+                              MdlLatency, OperLatency, 0);
 
     // No operand latency was found.
     unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
@@ -199,7 +277,8 @@
     // special cases without TII hooks.
     InstrLatency =
         std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
-    return InstrLatency;
+    return compareLatencies(TII, DefMI, DefOperIdx, UseMI, UseOperIdx,
+                            MdlLatency, InstrLatency, 0);
   }
   // hasInstrSchedModel()
   const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
@@ -211,17 +290,18 @@
     unsigned WriteID = WLEntry->WriteResourceID;
     unsigned Latency = capLatency(WLEntry->Cycles);
     if (!UseMI)
-      return Latency;
+      return compareLatencies(TII, DefMI, DefOperIdx, UseMI, UseOperIdx,
+                              MdlLatency, Latency, 0);
 
     // Lookup the use's latency adjustment in SubtargetInfo.
     const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
     if (UseDesc->NumReadAdvanceEntries == 0)
-      return Latency;
+      return compareLatencies(TII, DefMI, DefOperIdx, UseMI, UseOperIdx,
+                              MdlLatency, Latency, 0);
     unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
     int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
-    if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
-      return 0;
-    return Latency - Advance;
+    return compareLatencies(TII, DefMI, DefOperIdx, UseMI, UseOperIdx,
+                            MdlLatency, Latency, Advance);
   }
   // If DefIdx does not exist in the model (e.g. implicit defs), then return
   // unit latency (defaultDefLatency may be too conservative).
@@ -237,7 +317,10 @@
   // FIXME: Automatically giving all implicit defs defaultDefLatency is
   // undesirable. We should only do it for defs that are known to the MC
   // desc like flags. Truly implicit defs should get 1 cycle latency.
-  return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
+  unsigned Latency =
+      DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
+  return compareLatencies(TII, DefMI, DefOperIdx, UseMI, UseOperIdx, MdlLatency,
+                          Latency, 0);
 }
 
 unsigned
@@ -252,6 +335,10 @@
 }
 
 unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
+  if (hasMdlModel()) {
+    return llvm::mdl::calculateInstructionLatency(&Inst, STI, TII);
+  }
+
   if (hasInstrSchedModel())
     return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
   return computeInstrLatency(Inst.getOpcode());
@@ -260,6 +347,12 @@
 unsigned
 TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
                                       bool UseDefaultDefLatency) const {
+
+  // If we have an MDL model, use it to determine the latency.
+  if (hasMdlModel()) {
+    return llvm::mdl::calculateInstructionLatency(MI, STI);
+  }
+
   // For the itinerary model, fall back to the old subtarget hook.
   // Allow subtargets to compute Bundle latencies outside the machine model.
   if (hasInstrItineraries() || MI->isBundle() ||
@@ -296,6 +389,22 @@
 
   // If we have a per operand scheduling model, check if this def is writing
   // an unbuffered resource. If so, it treated like an in-order cpu.
+  if (hasMdlModel()) {
+    Instr Ins(DefMI, getSubtargetInfo());
+    if (auto *Subunit = Ins.getSubunit()) {
+      if (auto *Refs = (*Subunit)[0].getUsedResourceReferences())
+        for (const auto &Ref : ReferenceIter<ResourceRef>(Refs, &Ins))
+          if (Ref.isFus() && Ref.getCycles() && Ref.isBuffered())
+            return 1;
+
+      if (auto *Prefs = (*Subunit)[0].getPooledResourceReferences())
+        for (const auto &Ref : ReferenceIter<PooledResourceRef>(Prefs, &Ins))
+          if (Ref.isFus() && Ref.isBuffered())
+            return 1;
+    }
+    return 0;
+  }
+
   if (hasInstrSchedModel()) {
     const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
     if (SCDesc->isValid()) {
@@ -311,12 +420,14 @@
 
 double
 TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const {
+  if (hasMdlModel())
+    return STI->getCpuInfo()->getReciprocalThroughput(STI, MI);
+
   if (hasInstrItineraries()) {
     unsigned SchedClass = MI->getDesc().getSchedClass();
     return MCSchedModel::getReciprocalThroughput(SchedClass,
                                                  *getInstrItineraries());
   }
-
   if (hasInstrSchedModel())
     return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI));
 
@@ -340,6 +451,8 @@
 
 double
 TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const {
+  if (hasMdlModel())
+    return STI->getCpuInfo()->getReciprocalThroughput(STI, TII, &MI);
   if (hasInstrSchedModel())
     return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
   return computeReciprocalThroughput(MI.getOpcode());
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MDLInfo.h"
 
 using namespace llvm;
 
@@ -19,8 +20,9 @@
     ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
     const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
     const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC,
-    const unsigned *FP)
-    : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {}
+    const unsigned *FP, const mdl::CpuTableDef *MDL)
+    : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP,
+                      MDL) {}
 
 TargetSubtargetInfo::~TargetSubtargetInfo() = default;
 
diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
--- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MDLInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -63,8 +64,11 @@
                                   cl::desc("High register pressure threhold."));
 
 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
-                                     const TargetSchedModel *SM)
-    : TII(STI.getInstrInfo()), SchedModel(SM) {
+                                     const TargetSchedModel *SM,
+                                     ScheduleHazardRecognizer *HazardRec)
+    : TII(STI.getInstrInfo()), HazardRec(HazardRec), SchedModel(SM),
+      Cpu(STI.getCpuInfo()) {
+
   ResourcesModel = createPacketizer(STI);
 
   // This hard requirement could be relaxed,
@@ -113,6 +117,12 @@
   // in the current cycle.
   switch (SU->getInstr()->getOpcode()) {
   default:
+    // Use MDL to see if the instruction can be issued in this packet
+    if (Cpu) {
+      if (!HazardRec->canReserveResources(*SU->getInstr()))
+        return false;
+      break;
+    }
     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
       return false;
     break;
@@ -161,7 +171,13 @@
 
   switch (SU->getInstr()->getOpcode()) {
   default:
-    ResourcesModel->reserveResources(*SU->getInstr());
+    // use MDL method to reserve resources
+    if (Cpu) {
+      HazardRec->reserveResources(*SU->getInstr());
+    } else {
+      ResourcesModel->reserveResources(*SU->getInstr());
+      Packet.push_back(SU);
+    }
     break;
   case TargetOpcode::EXTRACT_SUBREG:
   case TargetOpcode::INSERT_SUBREG:
@@ -174,9 +190,12 @@
   case TargetOpcode::COPY:
   case TargetOpcode::INLINEASM:
   case TargetOpcode::INLINEASM_BR:
+    if (!Cpu)
+      Packet.push_back(SU);
+    else
+      HazardRec->reserveResources(*SU->getInstr());
     break;
   }
-  Packet.push_back(SU);
 
 #ifndef NDEBUG
   LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
@@ -286,8 +305,10 @@
 
   delete Top.ResourceModel;
   delete Bot.ResourceModel;
-  Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
-  Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+  Top.ResourceModel =
+      createVLIWResourceModel(STI, DAG->getSchedModel(), Top.HazardRec);
+  Bot.ResourceModel =
+      createVLIWResourceModel(STI, DAG->getSchedModel(), Bot.HazardRec);
 
   const std::vector<unsigned> &MaxPressure =
       DAG->getRegPressure().MaxSetPressure;
@@ -303,8 +324,9 @@
 }
 
 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
-    const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
-  return new VLIWResourceModel(STI, SchedModel);
+    const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel,
+    ScheduleHazardRecognizer *HazardRec) const {
+  return new VLIWResourceModel(STI, SchedModel, HazardRec);
 }
 
 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -62,6 +62,7 @@
   MCWinEH.cpp
   MCXCOFFObjectTargetWriter.cpp
   MCXCOFFStreamer.cpp
+  MDLInstrInfo.cpp
   MachObjectWriter.cpp
   SPIRVObjectWriter.cpp
   StringTableBuilder.cpp
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MDLInstrInfo.h"
 #include <optional>
 #include <type_traits>
 
@@ -69,6 +70,10 @@
 int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
                                       const MCInstrInfo &MCII,
                                       const MCInst &Inst) const {
+  // If we have MDL information, use it to compute the latency.
+  if (STI.getCpuInfo() != nullptr) {
+    return mdl::calculateInstructionLatency(&Inst, &STI, &MCII);
+  }
   unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
   const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
   if (!SCDesc->isValid())
diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp
--- a/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -11,6 +11,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
@@ -21,6 +22,9 @@
 
 using namespace llvm;
 
+static cl::opt<bool> EnableSchedMdl("schedmdl", cl::Hidden, cl::init(true),
+                                    cl::desc("Use MDL for scheduling"));
+
 /// Find KV in array using binary search.
 template <typename T>
 static const T *Find(StringRef S, ArrayRef<T> A) {
@@ -215,6 +219,10 @@
     CPUSchedModel = &getSchedModelForCPU(TuneCPU);
   else
     CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
+
+  // Initialize MDL database, look up the Cpu name in the table.
+  if (EnableSchedMdl && !TuneCPU.empty() && CpuTable != nullptr)
+    CpuModel = CpuTable->getCpu(TuneCPU.str());
 }
 
 void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
@@ -223,18 +231,16 @@
   FeatureString = std::string(FS);
 }
 
-MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef TC,
-                                 StringRef FS, ArrayRef<SubtargetFeatureKV> PF,
-                                 ArrayRef<SubtargetSubTypeKV> PD,
-                                 const MCWriteProcResEntry *WPR,
-                                 const MCWriteLatencyEntry *WL,
-                                 const MCReadAdvanceEntry *RA,
-                                 const InstrStage *IS, const unsigned *OC,
-                                 const unsigned *FP)
+MCSubtargetInfo::MCSubtargetInfo(
+    const Triple &TT, StringRef C, StringRef TC, StringRef FS,
+    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+    const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
+    const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC,
+    const unsigned *FP, const mdl::CpuTableDef *MDL)
     : TargetTriple(TT), CPU(std::string(C)), TuneCPU(std::string(TC)),
       ProcFeatures(PF), ProcDesc(PD), WriteProcResTable(WPR),
       WriteLatencyTable(WL), ReadAdvanceTable(RA), Stages(IS),
-      OperandCycles(OC), ForwardingPaths(FP) {
+      OperandCycles(OC), ForwardingPaths(FP), CpuTable(MDL) {
   InitMCProcessorInfo(CPU, TuneCPU, FS);
 }
 
diff --git a/llvm/lib/MC/MDLInstrInfo.cpp b/llvm/lib/MC/MDLInstrInfo.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/MC/MDLInstrInfo.cpp
@@ -0,0 +1,497 @@
+//===- MDLInstrInfo.cpp - MDL-based instruction modeling
+//--------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a set of APIs between the MDL database and the CodeGen
+// and MC libraries.  The MDL database uses the Instr class to access
+// information about MachineInstr and MCInst objects, and the CodeGen/MC
+// libraries use these interfaces to calculate various types of instruction
+// latencies and resource management.
+//
+// While this is part of the MC library, it uses interfaces into the Codegen
+// headers so that one interface can provide MDL information for both MCInst
+// and MachineInstr objects.  Note that we don't include code or data from
+// the Codegen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <cstdint>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "llvm/ADT/bit.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MDLInfo.h"
+#include "llvm/MC/MDLInstrInfo.h"
+// #include "llvm/Support/MathExtras.h"
+
+namespace llvm {
+namespace mdl {
+
+/// A SlotDesc represents a single issue slot, containing the entire context
+/// of how an instruction is bundled.
+SlotDesc::SlotDesc(const MCInst *MC, const MCSubtargetInfo *STI,
+                   const MCInstrInfo *MCII)
+    : Inst(MC, STI, MCII),
+      Subunits((*STI->getCpuInfo()->getSubunits())[Inst.getOpcode()]),
+      SubunitId(0) {}
+SlotDesc::SlotDesc(MachineInstr *MI, const TargetSubtargetInfo *STI)
+    : Inst(MI, STI),
+      Subunits((*STI->getCpuInfo()->getSubunits())[Inst.getOpcode()]),
+      SubunitId(0) {}
+
+/// The Instr object provides a common interface to the MDL compiler for
+/// accessing information in EITHER MachineInstrs and MCInsts.
+Instr::Instr(const MachineInstr *MI, const TargetSubtargetInfo *STI)
+    : MI(MI), TII(STI->getInstrInfo()), Cpu(STI->getCpuInfo()) {}
+Instr::Instr(const MachineInstr *MI, const TargetInstrInfo *TII, CpuInfo *Cpu)
+    : MI(MI), TII(TII), Cpu(Cpu) {}
+
+Instr::Instr(const MCInst *MC, const MCSubtargetInfo *STI,
+             const MCInstrInfo *MCII)
+    : MC(MC), STI(STI), MCII(MCII), Cpu(STI->getCpuInfo()) {}
+
+// Get the LLVM name for this instruction.
+std::string Instr::getName() { return TII->getName(getOpcode()).str(); }
+// Get the opcode for an instruction.
+int Instr::getOpcode() { return isMC() ? MC->getOpcode() : MI->getOpcode(); }
+
+// Evaluate a Target library instruction predicate for this instruction.
+bool Instr::evaluatePredicate(int PredId) {
+  return Cpu->evaluatePredicate(PredId, this);
+}
+
+// Return the set of subunits for an instruction and CPU combination.
+SubunitVec *Instr::getSubunit() { return Cpu->getSubunit(getOpcode()); }
+
+/// Return the raw bits associated with an operand.
+int64_t Instr::getOperand(int OperandIndex) {
+  if (isMC()) {
+    const MCOperand &MO = MC->getOperand(OperandIndex);
+    if (MO.isImm())
+      return MO.getImm();
+    if (MO.isSFPImm())
+      return MO.getSFPImm();
+    if (MO.isDFPImm())
+      return MO.getDFPImm();
+    if (MO.isReg())
+      return MO.getReg();
+  } else {
+    const MachineOperand &MO = MI->getOperand(OperandIndex);
+    if (MO.isImm())
+      return MO.getImm();
+    if (MO.isFPImm())
+      return llvm::bit_cast<uint32_t>(
+          MO.getFPImm()->getValueAPF().convertToFloat());
+    if (MO.isReg())
+      return MO.getReg();
+  }
+  return 0;
+}
+
+// Return true if a specific operand is a literal (immediate of some form).
+bool Instr::isOpndLiteral(int OperandIndex) {
+  if (isMC()) {
+    const MCOperand &MO = MC->getOperand(OperandIndex);
+    return MO.isImm() || MO.isSFPImm() || MO.isDFPImm();
+  } else {
+    const MachineOperand &MO = MI->getOperand(OperandIndex);
+    return MO.isImm() || MO.isFPImm();
+  }
+}
+
+// Return true if a specific operand is a relocatable address.
+bool Instr::isOpndAddress(int OperandIndex) {
+  if (isMC())
+    return MC->getOperand(OperandIndex).isExpr();
+  else
+    return MI->getOperand(OperandIndex).isGlobal() ||
+           MI->getOperand(OperandIndex).isMBB();
+}
+
+// Return true if a specific operand is a code label.
+bool Instr::isOpndLabel(int OperandIndex) {
+  if (isMC())
+    return MC->getOperand(OperandIndex).isExpr();
+  else
+    return MI->getOperand(OperandIndex).isMBB();
+}
+
+// Return true if a specific operand is a register.
+bool Instr::isOpndRegister(int OperandIndex) {
+  if (isMC())
+    return MC->getOperand(OperandIndex).isReg();
+  else
+    return MI->getOperand(OperandIndex).isReg();
+}
+
+// Return true if a specific operand is a virtual register.
+bool Instr::isOpndVirtualRegister(int OperandIndex) {
+  if (isMC()) {
+    const MCOperand &MO = MC->getOperand(OperandIndex);
+    if (!MO.isReg())
+      return false;
+    return false; // MO.getReg().isVirtual();
+  } else {
+    const MachineOperand &MO = MI->getOperand(OperandIndex);
+    if (!MO.isReg())
+      return false;
+    return MO.getReg().isVirtual();
+  }
+}
+
+/// Return true if a MachineInstr has more operands than described in its
+/// MCInst description.
+bool Instr::hasExtraOperands() {
+  if (isMC())
+    return false;
+  return MI->getNumOperands() != MI->getDesc().getNumOperands();
+}
+
+/// Since LLVM may arbitrarily insert and append operands onto instruction
+/// instances, we can't always rely on the operand indexes that the model
+/// provides. To deal with this, we create a sorted list of the defs or
+/// uses for an instruction, then determine if the requested operand has
+/// information, or not.
+/// NOTE: This isn't as onerous as it sounds: operand insertions are rare,
+/// and typically instructions only have a few explicit references.
+inline OperandRef const *findOrderedReference(Instr *Inst, ReferenceType Type,
+                                              int OpndId, OperandRefVec *Refs) {
+  // Find the set of defs OR uses for this instruction, and sort them by
+  // operand index and pipeline phase. We want the latest defs and the
+  // earliest uses, so that when we're searching the sorted list below,
+  // we find the right reference first.
+  std::vector<const OperandRef *> FoundRefs;
+  for (const auto &Opnd : ReferenceIter<OperandRef>(Refs, Inst))
+    if (Opnd.getType() == Type)
+      FoundRefs.push_back(&Opnd);
+
+  int RefIdx = 0;
+  if (Type == ReferenceTypes::RefDef) {
+    std::stable_sort(FoundRefs.begin(), FoundRefs.end(),
+                     [Inst](const OperandRef *a, const OperandRef *b) {
+                       return a->getOperandIndex() < b->getOperandIndex() ||
+                              (a->getOperandIndex() == b->getOperandIndex() &&
+                               a->getPhase(Inst) > b->getPhase(Inst));
+                     });
+    for (int i = 0; i < OpndId; i++) {
+      const MachineOperand &MO = Inst->getMI()->getOperand(i);
+      if (MO.isReg() && MO.isDef())
+        RefIdx++;
+    }
+  } else {
+    std::stable_sort(FoundRefs.begin(), FoundRefs.end(),
+                     [Inst](const OperandRef *a, const OperandRef *b) {
+                       return a->getOperandIndex() < b->getOperandIndex() ||
+                              (a->getOperandIndex() == b->getOperandIndex() &&
+                               a->getPhase(Inst) < b->getPhase(Inst));
+                     });
+    for (int i = 0; i < OpndId; i++) {
+      const MachineOperand &MO = Inst->getMI()->getOperand(i);
+      if (MO.isReg() && MO.readsReg() && !MO.isDef())
+        RefIdx++;
+    }
+  }
+
+  // Find the "nth" operand id in the sorted reference list and return it.
+  int LastIndex = -1;
+  int Seen = 0;
+  for (const auto *Item : FoundRefs)
+    if (Item->getOperandIndex() != LastIndex) {
+      if (++Seen > RefIdx)
+        return Item;
+      else
+        LastIndex = Item->getOperandIndex();
+    }
+
+  return nullptr;
+}
+
+/// Given two references, return the "best" one.  In general, we want the
+/// latest defs and the earliest uses.
+static const OperandRef *bestRef(const OperandRef *Best, const OperandRef *Item,
+                                 Instr *Inst) {
+  if (Best == nullptr)
+    return Item;
+  if (Best->getType() == ReferenceTypes::RefDef &&
+      Item->getPhase(Inst) > Best->getPhase(Inst))
+    return Item;
+  if (Best->getType() == ReferenceTypes::RefUse &&
+      Item->getPhase(Inst) < Best->getPhase(Inst))
+    return Item;
+  return Best;
+}
+
+/// Search an operand reference list for a reference to a particular operand.
+inline OperandRef const *findReference(Instr *Inst, ReferenceType Type,
+                                       int OpndId, OperandRefVec *Refs) {
+  if (Inst->hasExtraOperands())
+    return findOrderedReference(Inst, Type, OpndId, Refs);
+
+  const OperandRef *BestRef = nullptr;
+  for (const auto &Opnd : ReferenceIter<OperandRef>(Refs, Inst))
+    if (Opnd.getType() == Type && Opnd.getOperandIndex() == OpndId)
+      BestRef = bestRef(BestRef, &Opnd, Inst);
+
+  return BestRef; // Return the best reference found, or nullptr.
+}
+
+// Given two sets of resource references, look up the forwarding adjustment
+// in the forwarding table.
+// TODO: When there more than one functional unit, we need a heuristic
+// to determine if forwarding occurs.
+int calculateForwardingAdjustment(CpuInfo &Cpu, Instr *Def, Subunit &DefUnit,
+                                  Instr *Use, Subunit &UseUnit) {
+  // No point doing this if there isn't a forwarding table.
+  auto *FwdTable = Cpu.getForwardTable();
+  if (FwdTable == nullptr)
+    return 0; // No forwarding table
+
+  // Find the first functional unit resource for the Def instruction.
+  int DefFuId = -1;
+  if (auto *DefRes = DefUnit.getUsedResourceReferences())
+    for (auto &DefFu : ReferenceIter<ResourceRef>(DefRes, Def))
+      if (DefFu.isFus() || Cpu.isFuncUnitId(DefFu.getResourceId())) {
+        DefFuId = DefFu.getResourceId();
+        break;
+      }
+
+  // If we didn't find a functional unit in resources, check resource pools.
+  // We'll take the first member of the first pool we find, with the assumption
+  // that all the functional units in the pool have similar forwarding.
+  if (DefFuId == -1) {
+    if (auto *DefRes = DefUnit.getPooledResourceReferences())
+      for (auto &DefFu : ReferenceIter<PooledResourceRef>(DefRes, Def))
+        if (DefFu.isFus()) {
+          DefFuId = DefFu.getResourceIds()[0];
+          break;
+        }
+  }
+  if (DefFuId == -1)
+    return 0;
+
+  // Find the first functional unit resource for the Use instruction.
+  int UseFuId = -1;
+  if (auto *UseRes = UseUnit.getUsedResourceReferences())
+    for (auto &UseFu : ReferenceIter<ResourceRef>(UseRes, Use))
+      if (UseFu.isFus() || Cpu.isFuncUnitId(UseFu.getResourceId())) {
+        UseFuId = UseFu.getResourceId();
+        break;
+      }
+
+  // If we didn't find a functional unit in resources, check resource pools.
+  // We'll take the first member of the first pool we find, with the assumption
+  // that all the functional units in the pool have similar forwarding.
+  if (UseFuId == -1) {
+    if (auto *UseRes = UseUnit.getPooledResourceReferences())
+      for (auto &UseFu : ReferenceIter<PooledResourceRef>(UseRes, Use))
+        if (UseFu.isFus()) {
+          UseFuId = UseFu.getResourceIds()[0];
+          break;
+        }
+  }
+  if (UseFuId == -1)
+    return 0;
+
+  return FwdTable[DefFuId][UseFuId];
+}
+
+// This is essentially a clone of MachineInstr::isTransient, except that it
+// doesn't depend on code in the CodeGen library, and doesn't handle bundles.
+static bool isTransient(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    return MI->getDesc().getFlags() & (1 << MCID::Meta);
+  case TargetOpcode::PHI:
+  case TargetOpcode::G_PHI:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+    return true;
+  }
+}
+// This is essentially a clone of MachineInstr::mayLoad, except that it
+// doesn't depend on code in the CodeGen library, and doesn't handle bundles.
+static bool mayLoad(const MachineInstr *MI) {
+  return MI->getDesc().getFlags() & (1 << MCID::MayLoad);
+}
+
+// Return a default expected latency for a Def'ed register which has no
+// MDL information about its latency.  Note that this simply replicates
+// the "normal" TargetInstrInfo default latency api.
+static unsigned defaultDefLatency(Instr *Def, CpuInfo &Cpu) {
+  if (auto *DefMI = Def->getMI()) {
+    if (isTransient(DefMI))
+      return 0;
+    if (mayLoad(DefMI))
+      return Cpu.getLoadPhase();
+    if (Def->getTII()->isHighLatencyDef(Def->getOpcode()))
+      return Cpu.getHighLatencyDefPhase();
+  }
+  return 1;
+}
+
+/// Calculate the latency between two instructions' operands.
+/// This function will always receive a Def, but doesn't always get a Use.
+/// For Defs or Uses, if we don't have a subunit description, or the subunit
+/// doesn't mention the operand, we use the earliest reference cycle for
+/// the current CPU.
+int calculateOperandLatency(Instr *Def, unsigned DefOpId, Instr *Use,
+                            unsigned UseOpId) {
+  CpuInfo &Cpu = *Def->getCpuInfo();
+
+  int DefPhase = -1;
+  int UsePhase = Cpu.getEarlyUsePhase(); // Default use stage
+  int FwdAdjust = 0;
+
+  int DefSuId = Def->getSubunitId();
+  int UseSuId = Use ? Use->getSubunitId() : 0;
+  SubunitVec *DefSubunit = nullptr;
+  SubunitVec *UseSubunit = nullptr;
+
+  if (Cpu.IsInstruction(Def->getOpcode(), DefOpId))
+    if ((DefSubunit = Def->getSubunit()))
+      if (auto *DefRefs = (*DefSubunit)[DefSuId].getOperandReferences())
+        if (auto *DefRef =
+                findReference(Def, ReferenceTypes::RefDef, DefOpId, DefRefs))
+          DefPhase = DefRef->getPhase(Def);
+
+  // If we don't find the def, choose a reasonable latency.
+  if (DefPhase == -1)
+    return defaultDefLatency(Def, Cpu);
+
+  // Find the phase for a Use instruction, if provided.
+  if (Use && Cpu.IsInstruction(Use->getOpcode(), UseOpId))
+    if ((UseSubunit = Use->getSubunit()))
+      if (auto *UseRefs = (*UseSubunit)[UseSuId].getOperandReferences())
+        if (auto *UseRef =
+                findReference(Use, ReferenceTypes::RefUse, UseOpId, UseRefs))
+          UsePhase = UseRef->getPhase(Use);
+
+  // If we have subunits for the def and the use, check for forwarding
+  // information and adjust the latency. If we can't identify a subunit, we
+  // can't know anything about forwarding.
+  if (DefSubunit && UseSubunit) {
+    auto &UseUnit = (*UseSubunit)[UseSuId];
+    auto &DefUnit = (*DefSubunit)[DefSuId];
+    FwdAdjust = calculateForwardingAdjustment(Cpu, Def, DefUnit, Use, UseUnit);
+  }
+
+  return std::max(DefPhase - UsePhase - FwdAdjust + 1, 0);
+}
+
+/// Wrapper for MachineInstr Objects.
+/// This function will always receive a Def, but doesn't always get a Use.
+int calculateOperandLatency(const MachineInstr *Def, unsigned DefOpId,
+                            const MachineInstr *Use, unsigned UseOpId,
+                            const TargetSubtargetInfo *STI) {
+  Instr instDef(Def, STI);
+  if (Use) {
+    Instr instUse(Use, STI);
+    return calculateOperandLatency(&instDef, DefOpId, &instUse, UseOpId);
+  }
+  return calculateOperandLatency(&instDef, DefOpId, nullptr, UseOpId);
+}
+
+/// Search a list of operand references for the maximum latency.
+inline int findMaxLatency(Instr *Inst, OperandRefVec *Refs) {
+  int Max = -1;
+  if (Refs == nullptr)
+    return Max;
+  for (auto &Ref : ReferenceIter<OperandRef>(Refs, Inst))
+    Max = std::max(Max, Ref.getPhase(Inst));
+
+  return Max;
+}
+
+/// Find the maximum latency of an instruction based on operand references.
+int calculateInstructionLatency(Instr *Inst) {
+  CpuInfo &Cpu = *Inst->getCpuInfo();
+  if (!Cpu.IsInstruction(Inst->getOpcode(), 0))
+    return 0;
+
+  // Handle instructions that don't have subunit information, or whose subunits
+  // don't mention all operands. If the instructions were annotated with
+  // assigned subunits, we wouldn't have to check all of them.
+  int Max = Cpu.getEarlyUsePhase();
+  if (auto *Subunits = Inst->getSubunit())
+    for (auto &Subunit : *Subunits)
+      Max = std::max(Max, findMaxLatency(Inst, Subunit.getOperandReferences()));
+  return std::max(Max - static_cast<int>(Cpu.getEarlyUsePhase()) + 1, 0);
+}
+
+/// Wrapper for MCInst objects.
+int calculateInstructionLatency(const MCInst *Inst, const MCSubtargetInfo *STI,
+                                const MCInstrInfo *MCII) {
+  Instr Ins(Inst, STI, MCII);
+  return calculateInstructionLatency(&Ins);
+}
+
+/// Wrapper for MachineInstr objects.
+int calculateInstructionLatency(const MachineInstr *Inst,
+                                const TargetSubtargetInfo *STI) {
+
+  Instr Ins(Inst, STI);
+  return calculateInstructionLatency(&Ins);
+}
+
+/// Calculate the latency between two instructions that hold or reserve the
+/// same resource.
+int calculateHazardLatency(Instr *Reserve, Instr *Hold) {
+  auto *ReserveSu = Reserve->getSubunit();
+  auto *HoldSu = Hold->getSubunit();
+  if (ReserveSu == nullptr || HoldSu == nullptr)
+    return -1;
+
+  int ResId = Reserve->getSubunitId();
+  int HoldId = Hold->getSubunitId();
+  auto *ReserveRefs = (*ReserveSu)[ResId].getReservedResourceReferences();
+  auto *HoldRefs = (*HoldSu)[HoldId].getHeldResourceReferences();
+  if (HoldRefs == nullptr || ReserveRefs == nullptr)
+    return -1;
+
+  int MaxLatency = -1;
+  for (auto const &ResRef : *ReserveRefs) {
+    for (auto const &HoldRef : *HoldRefs) {
+      if (ResRef.getResourceId() == HoldRef.getResourceId()) {
+        int Latency = ResRef.getPhase(Reserve) - HoldRef.getPhase(Hold) + 1;
+        MaxLatency = std::max(MaxLatency, std::max(Latency, 0));
+      }
+    }
+  }
+  return MaxLatency;
+}
+
+/// Wrapper for MCInst objects.
+int calculateHazardLatency(const MCInst *Reserve, const MCInst *Hold,
+                           const MCSubtargetInfo *STI,
+                           const MCInstrInfo *MCII) {
+  Instr ReserveInst(Reserve, STI, MCII);
+  Instr HoldInst(Hold, STI, MCII);
+  return calculateHazardLatency(&ReserveInst, &HoldInst);
+}
+
+/// Wrapper for MachineInstr objects.
+int calculateHazardLatency(MachineInstr *Reserve, MachineInstr *Hold,
+                           TargetSubtargetInfo *STI) {
+  Instr ReserveInst(Reserve, STI);
+  Instr HoldInst(Reserve, STI);
+  return calculateHazardLatency(&ReserveInst, &HoldInst);
+}
+
+} // namespace mdl
+} // namespace llvm
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -39,6 +39,7 @@
   computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
 }
 
+// TODO-MDL - It appears that this isn't necessary for MDL-based models
 static void initializeUsedResources(InstrDesc &ID,
                                     const MCSchedClassDesc &SCDesc,
                                     const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/TargetParser/AArch64TargetParser.h"
 
@@ -34,6 +35,16 @@
 #define GET_SUBTARGETINFO_TARGET_DESC
 #include "AArch64GenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "AArch64GenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "AArch64GenMdlTarget.inc"
+#define AArch64CpuTable &AArch64::CpuTable
+#else
+#define AArch64CpuTable nullptr
+#endif
+
 static cl::opt<bool>
 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
                      "converter pass"), cl::init(true), cl::Hidden);
@@ -294,12 +305,11 @@
                                    unsigned MaxSVEVectorSizeInBitsOverride,
                                    bool StreamingSVEMode,
                                    bool StreamingCompatibleSVEMode)
-    : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
+    : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS, AArch64CpuTable),
       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
-      IsLittle(LittleEndian),
-      StreamingSVEMode(StreamingSVEMode),
+      IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode),
       StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
@@ -329,6 +339,10 @@
     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
       ReserveXRegisterForRA.set(i);
   }
+  // Register the MDL Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  AArch64::CpuTable.SetInstrPredicates(&AArch64::InstrPredicates);
+#endif
   // X30 is named LR, so we can't use TRI->getName to check X30.
   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
     ReserveXRegisterForRA.set(30);
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -26,9 +26,51 @@
 tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM AArch64GenSystemOperands.inc -gen-searchable-tables)
 tablegen(LLVM AArch64GenExegesis.inc -gen-exegesis)
+tablegen(LLVM AArch64.txt -print-records)
 
 add_public_tablegen_target(AArch64CommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanAArch64
+    COMMAND tdscan -gen_arch_spec --nowarnings AArch64.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating AArch64.mdl..."
+                    DEPENDS AArch64CommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseAArch64
+    "${CMAKE_CURRENT_BINARY_DIR}/AArch64GenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/AArch64GenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/AArch64GenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseAArch64}
+    COMMAND mdl AArch64.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating AArch64GenMdlInfo.inc..."
+                    DEPENDS TdScanAArch64
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseAArch64})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/AArch64GenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/AArch64GenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/AArch64GenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseAArch64} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlAArch64 DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlAArch64 ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlAArch64 PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlAArch64)
+endif()
+############### End Mdl
+
 add_llvm_target(AArch64CodeGen
   GISel/AArch64CallLowering.cpp
   GISel/AArch64GlobalISelUtils.cpp
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64InstPrinter.h"
 #include "TargetInfo/AArch64TargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -43,6 +44,14 @@
 #define GET_REGINFO_MC_DESC
 #include "AArch64GenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "AArch64GenMdlInfo.inc"
+#define AArch64CpuTable &AArch64::CpuTable
+#else
+#define AArch64CpuTable nullptr
+#endif
+
 static MCInstrInfo *createAArch64MCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitAArch64MCInstrInfo(X);
@@ -60,7 +69,8 @@
       CPU = "apple-a12";
   }
 
-  return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                          AArch64CpuTable);
 }
 
 void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/MDBuilder.h"
@@ -40,6 +41,16 @@
 #include "AMDGPUGenSubtargetInfo.inc"
 #undef AMDGPUSubtarget
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "AMDGPUGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "AMDGPUGenMdlTarget.inc"
+#define AMDGPUCpuTable &AMDGPU::CpuTable
+#else
+#define AMDGPUCpuTable nullptr
+#endif
+
 static cl::opt<bool> EnablePowerSched(
   "amdgpu-enable-power-sched",
   cl::desc("Enable scheduling to minimize mAI power bursts"),
@@ -169,7 +180,7 @@
 GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                            const GCNTargetMachine &TM)
     : // clang-format off
-    AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
+    AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS, AMDGPUCpuTable),
     AMDGPUSubtarget(TT),
     TargetTriple(TT),
     TargetID(*this),
@@ -186,6 +197,11 @@
   RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
   InstSelector.reset(new AMDGPUInstructionSelector(
   *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  AMDGPU::CpuTable.SetInstrPredicates(&AMDGPU::InstrPredicates);
+#endif
 }
 
 unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -14,6 +14,7 @@
 tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM AMDGPUGenSearchableTables.inc -gen-searchable-tables)
 tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AMDGPU.txt -print-records)
 
 set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
 tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
@@ -33,6 +34,7 @@
 tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM R600.txt -print-records)
 
 add_public_tablegen_target(AMDGPUCommonTableGen)
 
@@ -40,6 +42,83 @@
 tablegen(LLVM InstCombineTables.inc -gen-searchable-tables)
 add_public_tablegen_target(InstCombineTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanAMDGPU
+    COMMAND tdscan -gen_arch_spec --nowarnings AMDGPU.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating AMDGPU.mdl..."
+                    DEPENDS AMDGPUCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseAMDGPU
+    "${CMAKE_CURRENT_BINARY_DIR}/AMDGPUGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/AMDGPUGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/AMDGPUGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseAMDGPU}
+    COMMAND mdl AMDGPU.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating AMDGPUGenMdlInfo.inc..."
+                    DEPENDS TdScanAMDGPU
+                    VERBATIM)
+
+  # Generate MDL for R600
+  add_custom_target(TdScanR600
+    COMMAND tdscan -gen_arch_spec --nowarnings R600.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating R600.mdl..."
+        DEPENDS AMDGPUCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseR600
+        "${CMAKE_CURRENT_BINARY_DIR}/R600GenMdlInfo.inc"
+        "${CMAKE_CURRENT_BINARY_DIR}/R600GenMdlInfo.h"
+        "${CMAKE_CURRENT_BINARY_DIR}/R600GenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseR600}
+    COMMAND mdl R600.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating R600GenMdlInfo.inc..."
+        DEPENDS TdScanR600
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseAMDGPU} ${MdlDatabaseR600})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT}
+    "${CMAKE_CURRENT_BINARY_DIR}/AMDGPUGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT}
+    "${CMAKE_CURRENT_BINARY_DIR}/AMDGPUGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT}
+    "${CMAKE_CURRENT_BINARY_DIR}/AMDGPUGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseAMDGPU}
+    PROPERTIES GENERATED 1)
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT}
+    "${CMAKE_CURRENT_BINARY_DIR}/R600GenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT}
+    "${CMAKE_CURRENT_BINARY_DIR}/R600GenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT}
+    "${CMAKE_CURRENT_BINARY_DIR}/R600GenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseR600}
+    PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlAMDGPU DEPENDS ${TABLEGEN_OUTPUT})
+  add_custom_target(MdlR600 DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlAMDGPU ${LLVM_COMMON_DEPENDS})
+    add_dependencies(MdlR600 ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlAMDGPU PROPERTIES FOLDER "MdlGeneration")
+  set_target_properties(MdlR600 PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlAMDGPU MdlR600)
+endif()
+############### End Mdl
+
 add_llvm_target(AMDGPUCodeGen
   AMDGPUAliasAnalysis.cpp
   AMDGPUAlwaysInlinePass.cpp
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -19,6 +19,7 @@
 #include "R600InstPrinter.h"
 #include "R600MCTargetDesc.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -53,6 +54,14 @@
 #define GET_REGINFO_MC_DESC
 #include "R600GenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "AMDGPUGenMdlInfo.inc"
+#define AMDGPUCpuTable &AMDGPU::CpuTable
+#else
+#define AMDGPUCpuTable nullptr
+#endif
+
 static MCInstrInfo *createAMDGPUMCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitAMDGPUMCInstrInfo(X);
@@ -77,8 +86,10 @@
 static MCSubtargetInfo *
 createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   if (TT.getArch() == Triple::r600)
-    return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
-  return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+    return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                         AMDGPUCpuTable);
+  return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                         AMDGPUCpuTable);
 }
 
 static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "R600MCTargetDesc.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
 
@@ -21,6 +22,11 @@
 #define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "R600GenInstrInfo.inc"
 
+// Include the generated MDL database
+#if ENABLE_MDL_USE
+#include "R600GenMdlInfo.inc"
+#endif
+
 MCInstrInfo *llvm::createR600MCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitR600MCInstrInfo(X);
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
@@ -13,6 +13,7 @@
 
 #include "R600Subtarget.h"
 #include "MCTargetDesc/R600MCTargetDesc.h"
+#include "llvm/Config/config.h"
 
 using namespace llvm;
 
@@ -22,14 +23,26 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "R600GenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "R600GenMdlInfo.h"
+#define R600CpuTable &R600::CpuTable
+#else
+#define R600CpuTable nullptr
+#endif
+
 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
                              const TargetMachine &TM)
-    : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT),
-      InstrInfo(*this),
+    : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS, R600CpuTable),
+      AMDGPUSubtarget(TT), InstrInfo(*this),
       FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
       TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
       InstrItins(getInstrItineraryForCPU(GPU)) {
+  // Note we don't need to register InstrPredicates for R600.
+
   AddressableLocalMemorySize = LocalMemorySize;
+
+  // Note we don't need to register InstrPredicates for R600.
 }
 
 R600Subtarget &R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
diff --git a/llvm/lib/Target/ARC/ARCSubtarget.cpp b/llvm/lib/Target/ARC/ARCSubtarget.cpp
--- a/llvm/lib/Target/ARC/ARCSubtarget.cpp
+++ b/llvm/lib/Target/ARC/ARCSubtarget.cpp
@@ -26,5 +26,5 @@
 
 ARCSubtarget::ARCSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const TargetMachine &TM)
-    : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), InstrInfo(*this),
-      FrameLowering(*this), TLInfo(TM, *this) {}
+    : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS, nullptr),
+      InstrInfo(*this), FrameLowering(*this), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -49,7 +49,7 @@
 
 static MCSubtargetInfo *createARCMCSubtargetInfo(const Triple &TT,
                                                  StringRef CPU, StringRef FS) {
-  return createARCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS);
+  return createARCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS, nullptr);
 }
 
 static MCAsmInfo *createARCMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -46,6 +47,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "ARMGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "ARMGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "ARMGenMdlTarget.inc"
+#define ARMCpuTable &ARM::CpuTable
+#else
+#define ARMCpuTable nullptr
+#endif
+
 static cl::opt<bool>
 UseFusedMulOps("arm-use-mulops",
                cl::init(true), cl::Hidden);
@@ -93,19 +104,16 @@
                            const std::string &FS,
                            const ARMBaseTargetMachine &TM, bool IsLittle,
                            bool MinSize)
-    : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, ARMCpuTable),
       UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize),
       IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
       FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
-      InstrInfo(isThumb1Only()
-                    ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
-                    : !isThumb()
-                          ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
-                          : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+      InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+                : !isThumb()   ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+                             : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
       TLInfo(TM, *this) {
-
   CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
   Legalizer.reset(new ARMLegalizerInfo(*this));
 
@@ -118,6 +126,11 @@
       *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI));
 
   RegBankInfo.reset(RBI);
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  ARM::CpuTable.SetInstrPredicates(&ARM::InstrPredicates);
+#endif
 }
 
 const CallLowering *ARMSubtarget::getCallLowering() const {
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -16,9 +16,51 @@
 tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables)
+tablegen(LLVM ARM.txt -print-records)
 
 add_public_tablegen_target(ARMCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanARM
+    COMMAND tdscan -gen_arch_spec --nowarnings ARM.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating ARM.mdl..."
+                    DEPENDS ARMCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseARM
+    "${CMAKE_CURRENT_BINARY_DIR}/ARMGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/ARMGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/ARMGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseARM}
+    COMMAND mdl ARM.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating ARMGenMdlInfo.inc..."
+                    DEPENDS TdScanARM
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseARM})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/ARMGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/ARMGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/ARMGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseARM} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlARM DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlARM ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlARM PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlARM)
+endif()
+############### End Mdl
+
 add_llvm_target(ARMCodeGen
   A15SDOptimizer.cpp
   ARMAsmPrinter.cpp
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -13,9 +13,11 @@
 #include "ARMMCTargetDesc.h"
 #include "ARMAddressingModes.h"
 #include "ARMBaseInfo.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMInstPrinter.h"
 #include "ARMMCAsmInfo.h"
 #include "TargetInfo/ARMTargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -35,6 +37,14 @@
 #define GET_REGINFO_MC_DESC
 #include "ARMGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "ARMGenMdlInfo.inc"
+#define ARMCpuTable &ARM::CpuTable
+#else
+#define ARMCpuTable nullptr
+#endif
+
 static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                   std::string &Info) {
   if (STI.hasFeature(llvm::ARM::HasV7Ops) &&
@@ -211,7 +221,8 @@
       ArchFS = std::string(FS);
   }
 
-  return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
+  return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS,
+                                      ARMCpuTable);
 }
 
 static MCInstrInfo *createARMMCInstrInfo() {
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp
--- a/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,7 +29,8 @@
 
 AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const AVRTargetMachine &TM)
-    : AVRGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(*this),
+    : AVRGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr),
+      InstrInfo(*this),
       TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)) {
   // Parse features string.
   ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -54,7 +54,7 @@
 
 static MCSubtargetInfo *createAVRMCSubtargetInfo(const Triple &TT,
                                                  StringRef CPU, StringRef FS) {
-  return createAVRMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createAVRMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr);
 }
 
 static MCInstPrinter *createAVRMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -91,6 +91,6 @@
 
 BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const TargetMachine &TM)
-    : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr),
       FrameLowering(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -47,7 +47,7 @@
 
 static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT,
                                                  StringRef CPU, StringRef FS) {
-  return createBPFMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createBPFMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr);
 }
 
 static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx,
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
--- a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
@@ -89,7 +89,7 @@
 
 CSKYSubtarget::CSKYSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
                              StringRef FS, const TargetMachine &TM)
-    : CSKYGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+    : CSKYGenSubtargetInfo(TT, CPU, TuneCPU, FS, nullptr),
       FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS)),
       InstrInfo(*this), RegInfo(), TLInfo(TM, *this) {}
 
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -74,7 +74,8 @@
   std::string CPUName = std::string(CPU);
   if (CPUName.empty())
     CPUName = "generic";
-  return createCSKYMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU=*/CPUName, FS);
+  return createCSKYMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU=*/CPUName, FS,
+                                       nullptr);
 }
 
 static MCTargetStreamer *
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -12,9 +12,51 @@
 tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM Hexagon.txt -print-records)
 
 add_public_tablegen_target(HexagonCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanHexagon
+    COMMAND tdscan -gen_arch_spec --nowarnings Hexagon.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating Hexagon.mdl..."
+                    DEPENDS HexagonCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseHexagon
+    "${CMAKE_CURRENT_BINARY_DIR}/HexagonGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/HexagonGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/HexagonGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseHexagon}
+    COMMAND mdl Hexagon.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating HexagonGenMdlInfo.inc..."
+                    DEPENDS TdScanHexagon
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseHexagon})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/HexagonGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/HexagonGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/HexagonGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseHexagon} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlHexagon DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlHexagon ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlHexagon PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlHexagon)
+endif()
+############### End Mdl
+
 add_llvm_target(HexagonCodeGen
   BitTracker.cpp
   HexagonAsmPrinter.cpp
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -32,7 +32,8 @@
 protected:
   VLIWResourceModel *
   createVLIWResourceModel(const TargetSubtargetInfo &STI,
-                          const TargetSchedModel *SchedModel) const override;
+                          const TargetSchedModel *SchedModel,
+                          ScheduleHazardRecognizer *HazardRec) const override;
   int SchedulingCost(ReadyQueue &Q, SUnit *SU, SchedCandidate &Candidate,
                      RegPressureDelta &Delta, bool verbose) override;
 };
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -38,8 +38,9 @@
 }
 
 VLIWResourceModel *HexagonConvergingVLIWScheduler::createVLIWResourceModel(
-    const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
-  return new HexagonVLIWResourceModel(STI, SchedModel);
+    const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel,
+    ScheduleHazardRecognizer *HazardRec) const {
+  return new HexagonVLIWResourceModel(STI, SchedModel, HazardRec);
 }
 
 int HexagonConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/IntrinsicsHexagon.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -41,6 +42,16 @@
 #define GET_SUBTARGETINFO_TARGET_DESC
 #include "HexagonGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "HexagonGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "HexagonGenMdlTarget.inc"
+#define HexagonCpuTable &Hexagon::CpuTable
+#else
+#define HexagonCpuTable nullptr
+#endif
+
 static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
                                     cl::init(true));
 
@@ -79,16 +90,22 @@
 
 HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
                                    StringRef FS, const TargetMachine &TM)
-    : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, HexagonCpuTable),
       OptLevel(TM.getOptLevel()),
       CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       RegInfo(getHwMode()), TLInfo(TM, *this),
       InstrItins(getInstrItineraryForCPU(CPUString)) {
   Hexagon_MC::addArchSubtarget(this, FS);
+
   // Beware of the default constructor of InstrItineraryData: it will
   // reset all members to 0.
   assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  Hexagon::CpuTable.SetInstrPredicates(&Hexagon::InstrPredicates);
+#endif
 }
 
 HexagonSubtarget &
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -292,9 +292,9 @@
 // return true, otherwise, return false.
 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
   auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
-  bool Avail = ResourceTracker->canReserveResources(*ExtMI);
+  bool Avail = canReserveResources(*ExtMI);
   if (Reserve && Avail)
-    ResourceTracker->reserveResources(*ExtMI);
+    reserveResources(*ExtMI);
   MF.deleteMachineInstr(ExtMI);
   return Avail;
 }
@@ -891,7 +891,7 @@
     HII->getDotNewPredOp(MI, MBPI);
   const MCInstrDesc &D = HII->get(NewOpcode);
   MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
-  bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
+  bool ResourcesAvailable = canReserveResources(*NewMI);
   MF.deleteMachineInstr(NewMI);
   if (!ResourcesAvailable)
     return false;
@@ -1057,6 +1057,9 @@
 
   // We check if MI has any functional units mapped to it. If it doesn't,
   // we ignore the instruction.
+  if (HazardRec)
+    return true;
+
   const MCInstrDesc& TID = MI.getDesc();
   auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
   return !IS->getUnits();
@@ -1723,7 +1726,7 @@
     CurrentPacketMIs.push_back(&MI);
     return MII;
   }
-  assert(ResourceTracker->canReserveResources(MI));
+  assert(canReserveResources(MI));
 
   bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
   bool Good = true;
@@ -1734,14 +1737,14 @@
     // Either of them can require a constant extender. Try to add both to
     // the current packet, and if that fails, end the packet and start a
     // new one.
-    ResourceTracker->reserveResources(MI);
+    reserveResources(MI);
     if (ExtMI)
       Good = tryAllocateResourcesForConstExt(true);
 
     bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
     if (Good) {
-      if (ResourceTracker->canReserveResources(NvjMI))
-        ResourceTracker->reserveResources(NvjMI);
+      if (canReserveResources(NvjMI))
+        reserveResources(NvjMI);
       else
         Good = false;
     }
@@ -1750,14 +1753,14 @@
 
     if (!Good) {
       endPacket(MBB, MI);
-      assert(ResourceTracker->canReserveResources(MI));
-      ResourceTracker->reserveResources(MI);
+      assert(canReserveResources(MI));
+      reserveResources(MI);
       if (ExtMI) {
         assert(canReserveResourcesForConstExt());
         tryAllocateResourcesForConstExt(true);
       }
-      assert(ResourceTracker->canReserveResources(NvjMI));
-      ResourceTracker->reserveResources(NvjMI);
+      assert(canReserveResources(NvjMI));
+      reserveResources(NvjMI);
       if (ExtNvjMI) {
         assert(canReserveResourcesForConstExt());
         reserveResourcesForConstExt();
@@ -1768,7 +1771,7 @@
     return MII;
   }
 
-  ResourceTracker->reserveResources(MI);
+  reserveResources(MI);
   if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
     endPacket(MBB, MI);
     if (PromotedToDotNew)
@@ -1777,7 +1780,7 @@
       useCalleesSP(MI);
       GlueAllocframeStore = false;
     }
-    ResourceTracker->reserveResources(MI);
+    reserveResources(MI);
     reserveResourcesForConstExt();
   }
 
@@ -1827,7 +1830,7 @@
 
   PacketHasDuplex = false;
   PacketHasSLOT0OnlyInsn = false;
-  ResourceTracker->clearResources();
+  clearResources();
   LLVM_DEBUG(dbgs() << "End packet\n");
 }
 
@@ -1867,7 +1870,7 @@
       // with the original opcode.
       MachineInstr &MIRef = const_cast<MachineInstr &>(MI);
       MIRef.setDesc(HII->get(Opcode));
-      return ResourceTracker->canReserveResources(MIRef);
+      return canReserveResources(MIRef);
     }
   }
 
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -55,6 +56,14 @@
 #define GET_REGINFO_MC_DESC
 #include "HexagonGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "HexagonGenMdlInfo.inc"
+#define HexagonCpuTable &Hexagon::CpuTable
+#else
+#define HexagonCpuTable nullptr
+#endif
+
 cl::opt<bool> llvm::HexagonDisableCompound
   ("mno-compound",
    cl::desc("Disable looking for compound instructions for Hexagon"));
@@ -538,7 +547,7 @@
   StringRef ArchFS = Features.second;
 
   MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(
-      TT, CPUName, /*TuneCPU*/ CPUName, ArchFS);
+      TT, CPUName, /*TuneCPU*/ CPUName, ArchFS, HexagonCpuTable);
   if (X != nullptr && (CPUName == "hexagonv67t" || CPUName == "hexagon71t"))
     addArchSubtarget(X, ArchFS);
 
diff --git a/llvm/lib/Target/Lanai/CMakeLists.txt b/llvm/lib/Target/Lanai/CMakeLists.txt
--- a/llvm/lib/Target/Lanai/CMakeLists.txt
+++ b/llvm/lib/Target/Lanai/CMakeLists.txt
@@ -11,9 +11,51 @@
 tablegen(LLVM LanaiGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM LanaiGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM LanaiGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM Lanai.txt -print-records)
 
 add_public_tablegen_target(LanaiCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanLanai
+    COMMAND tdscan -gen_arch_spec --nowarnings Lanai.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating Lanai.mdl..."
+                    DEPENDS LanaiCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseLanai
+    "${CMAKE_CURRENT_BINARY_DIR}/LanaiGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/LanaiGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/LanaiGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseLanai}
+    COMMAND mdl Lanai.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating LanaiGenMdlInfo.inc..."
+                    DEPENDS TdScanLanai
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseLanai})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/LanaiGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/LanaiGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/LanaiGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseLanai} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlLanai DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlLanai ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlLanai PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlLanai)
+endif()
+############### End Mdl
+
 add_llvm_target(LanaiCodeGen
   LanaiAsmPrinter.cpp
   LanaiDelaySlotFiller.cpp
diff --git a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
--- a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -13,6 +13,7 @@
 #include "LanaiSubtarget.h"
 
 #include "Lanai.h"
+#include "llvm/Config/config.h"
 
 #define DEBUG_TYPE "lanai-subtarget"
 
@@ -20,6 +21,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "LanaiGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "LanaiGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "LanaiGenMdlTarget.inc"
+#define LanaiCpuTable &Lanai::CpuTable
+#else
+#define LanaiCpuTable nullptr
+#endif
+
 using namespace llvm;
 
 void LanaiSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -41,6 +52,13 @@
                                const TargetOptions & /*Options*/,
                                CodeModel::Model /*CodeModel*/,
                                CodeGenOpt::Level /*OptLevel*/)
-    : LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString),
+    : LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString,
+                            LanaiCpuTable),
       FrameLowering(initializeSubtargetDependencies(Cpu, FeatureString)),
-      TLInfo(TM, *this) {}
+      TLInfo(TM, *this) {
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  Lanai::CpuTable.SetInstrPredicates(&Lanai::InstrPredicates);
+#endif
+}
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -15,6 +15,7 @@
 #include "LanaiMCAsmInfo.h"
 #include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -37,6 +38,14 @@
 #define GET_REGINFO_MC_DESC
 #include "LanaiGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "LanaiGenMdlInfo.inc"
+#define LanaiCpuTable &Lanai::CpuTable
+#else
+#define LanaiCpuTable nullptr
+#endif
+
 using namespace llvm;
 
 static MCInstrInfo *createLanaiMCInstrInfo() {
@@ -57,7 +66,8 @@
   if (CPUName.empty())
     CPUName = "generic";
 
-  return createLanaiMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU*/ CPUName, FS);
+  return createLanaiMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU*/ CPUName, FS,
+                                        LanaiCpuTable);
 }
 
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -85,7 +85,7 @@
                                        StringRef TuneCPU, StringRef FS,
                                        StringRef ABIName,
                                        const TargetMachine &TM)
-    : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+    : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS, nullptr),
       FrameLowering(
           initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
       InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -56,7 +56,8 @@
 createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   if (CPU.empty() || CPU == "generic")
     CPU = TT.isArch64Bit() ? "la464" : "generic-la32";
-  return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                            nullptr);
 }
 
 static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp
--- a/llvm/lib/Target/M68k/M68kSubtarget.cpp
+++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp
@@ -50,8 +50,8 @@
 
 M68kSubtarget::M68kSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
                              const M68kTargetMachine &TM)
-    : M68kGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TM(TM), TSInfo(),
-      InstrInfo(initializeSubtargetDependencies(CPU, TT, FS, TM)),
+    : M68kGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr), TM(TM),
+      TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, TT, FS, TM)),
       FrameLowering(*this, this->getStackAlignment()), TLInfo(TM, *this),
       TargetTriple(TT) {
   CallLoweringInfo.reset(new M68kCallLowering(*getTargetLowering()));
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
@@ -67,7 +67,8 @@
       ArchFS = FS.str();
     }
   }
-  return createM68kMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, ArchFS);
+  return createM68kMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, ArchFS,
+                                       nullptr);
 }
 
 static MCAsmInfo *createM68kMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -67,7 +67,7 @@
 
 static MCSubtargetInfo *
 createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  return createMSP430MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createMSP430MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr);
 }
 
 static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
--- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -57,6 +57,6 @@
 
 MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
                                  const std::string &FS, const TargetMachine &TM)
-    : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       FrameLowering(*this) {}
diff --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt
--- a/llvm/lib/Target/Mips/CMakeLists.txt
+++ b/llvm/lib/Target/Mips/CMakeLists.txt
@@ -18,9 +18,51 @@
 tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM MipsGenExegesis.inc -gen-exegesis)
+tablegen(LLVM Mips.txt -print-records)
 
 add_public_tablegen_target(MipsCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanMips
+    COMMAND tdscan -gen_arch_spec --nowarnings Mips.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating Mips.mdl..."
+                    DEPENDS MipsCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseMips
+    "${CMAKE_CURRENT_BINARY_DIR}/MipsGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/MipsGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/MipsGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseMips}
+    COMMAND mdl Mips.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating MipsGenMdlInfo.inc..."
+                    DEPENDS TdScanMips
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseMips})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/MipsGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/MipsGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/MipsGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseMips} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlMips DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlMips ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlMips PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlMips)
+endif()
+############### End Mdl
+
 add_llvm_target(MipsCodeGen
   Mips16FrameLowering.cpp
   Mips16HardFloat.cpp
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -19,6 +19,7 @@
 #include "MipsMCNaCl.h"
 #include "MipsTargetStreamer.h"
 #include "TargetInfo/MipsTargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCInstrAnalysis.h"
@@ -45,6 +46,14 @@
 #define GET_REGINFO_MC_DESC
 #include "MipsGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "MipsGenMdlInfo.inc"
+#define MipsCpuTable &Mips::CpuTable
+#else
+#define MipsCpuTable nullptr
+#endif
+
 /// Select the Mips CPU for the given triple and cpu name.
 StringRef MIPS_MC::selectMipsCPU(const Triple &TT, StringRef CPU) {
   if (CPU.empty() || CPU == "generic") {
@@ -78,7 +87,8 @@
 static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT,
                                                   StringRef CPU, StringRef FS) {
   CPU = MIPS_MC::selectMipsCPU(TT, CPU);
-  return createMipsMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createMipsMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                       MipsCpuTable);
 }
 
 static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp
--- a/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -18,6 +18,7 @@
 #include "MipsRegisterBankInfo.h"
 #include "MipsRegisterInfo.h"
 #include "MipsTargetMachine.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/TargetRegistry.h"
@@ -33,6 +34,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "MipsGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "MipsGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "MipsGenMdlTarget.inc"
+#define MipsCpuTable &Mips::CpuTable
+#else
+#define MipsCpuTable nullptr
+#endif
+
 // FIXME: Maybe this should be on by default when Mips16 is specified
 //
 static cl::opt<bool>
@@ -71,7 +82,7 @@
 MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
                              bool little, const MipsTargetMachine &TM,
                              MaybeAlign StackAlignOverride)
-    : MipsGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : MipsGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, MipsCpuTable),
       MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false),
       IsSingleFloat(false), IsFPXX(false), NoABICalls(false), Abs2008(false),
       IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
@@ -79,13 +90,14 @@
       HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
       HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
       InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
-      HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 || Mips_Os16),
-      Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
-      HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false),
-      HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false),
-      StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT),
-      TSInfo(), InstrInfo(MipsInstrInfo::create(
-                    initializeSubtargetDependencies(CPU, FS, TM))),
+      HasDSPR2(false), HasDSPR3(false),
+      AllowMixed16_32(Mixed16_32 || Mips_Os16), Os16(Mips_Os16), HasMSA(false),
+      UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false),
+      HasMT(false), HasCRC(false), HasVirt(false), HasGINV(false),
+      UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride),
+      TM(TM), TargetTriple(TT), TSInfo(),
+      InstrInfo(
+          MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
       FrameLowering(MipsFrameLowering::create(*this)),
       TLInfo(MipsTargetLowering::create(TM, *this)) {
 
@@ -219,6 +231,11 @@
   RegBankInfo.reset(RBI);
   InstSelector.reset(createMipsInstructionSelector(
       *static_cast<const MipsTargetMachine *>(&TM), *this, *RBI));
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  Mips::CpuTable.SetInstrPredicates(&Mips::InstrPredicates);
+#endif
 }
 
 bool MipsSubtarget::isPositionIndependent() const {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -47,7 +47,7 @@
 
 static MCSubtargetInfo *
 createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createNVPTXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr);
 }
 
 static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -47,8 +47,8 @@
 NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS,
                                const NVPTXTargetMachine &TM)
-    : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
-      SmVersion(20), TM(TM),
+    : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr),
+      PTXVersion(0), SmVersion(20), TM(TM),
       TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
 
 bool NVPTXSubtarget::hasImageHandles() const {
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -15,9 +15,51 @@
 tablegen(LLVM PPCGenExegesis.inc -gen-exegesis)
 tablegen(LLVM PPCGenRegisterBank.inc -gen-register-bank)
 tablegen(LLVM PPCGenGlobalISel.inc -gen-global-isel)
+tablegen(LLVM PPC.txt -print-records)
 
 add_public_tablegen_target(PowerPCCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanPPC
+    COMMAND tdscan -gen_arch_spec --nowarnings PPC.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating PPC.mdl..."
+                    DEPENDS PowerPCCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabasePPC
+    "${CMAKE_CURRENT_BINARY_DIR}/PPCGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/PPCGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/PPCGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabasePPC}
+    COMMAND mdl PPC.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating PPCGenMdlInfo.inc..."
+                    DEPENDS TdScanPPC
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabasePPC})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/PPCGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/PPCGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/PPCGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabasePPC} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlPPC DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlPPC ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlPPC PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlPPC)
+endif()
+############### End Mdl
+
 add_llvm_target(PowerPCCodeGen
   GISel/PPCInstructionSelector.cpp
   PPCBoolRetToInt.cpp
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -57,6 +58,14 @@
 #define GET_REGINFO_MC_DESC
 #include "PPCGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "PPCGenMdlInfo.inc"
+#define PPCCpuTable &PPC::CpuTable
+#else
+#define PPCCpuTable nullptr
+#endif
+
 PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 
 // Pin the vtable to this file.
@@ -91,7 +100,8 @@
       FullFS = "+aix";
   }
 
-  return createPPCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FullFS);
+  return createPPCMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FullFS,
+                                      PPCCpuTable);
 }
 
 static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -37,6 +38,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "PPCGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "PPCGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "PPCGenMdlTarget.inc"
+#define PPCCpuTable &PPC::CpuTable
+#else
+#define PPCCpuTable nullptr
+#endif
+
 static cl::opt<bool>
     UseSubRegLiveness("ppc-track-subreg-liveness",
                       cl::desc("Enable subregister liveness tracking for PPC"),
@@ -58,7 +69,7 @@
 PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &TuneCPU, const std::string &FS,
                            const PPCTargetMachine &TM)
-    : PPCGenSubtargetInfo(TT, CPU, TuneCPU, FS), TargetTriple(TT),
+    : PPCGenSubtargetInfo(TT, CPU, TuneCPU, FS, PPCCpuTable), TargetTriple(TT),
       IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
               TargetTriple.getArch() == Triple::ppc64le),
       TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
@@ -70,6 +81,11 @@
 
   InstSelector.reset(createPPCInstructionSelector(
       *static_cast<const PPCTargetMachine *>(&TM), *this, *RBI));
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  PPC::CpuTable.SetInstrPredicates(&PPC::InstrPredicates);
+#endif
 }
 
 void PPCSubtarget::initializeEnvironment() {
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -15,9 +15,51 @@
 tablegen(LLVM RISCVGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM RISCVGenSearchableTables.inc -gen-searchable-tables)
 tablegen(LLVM RISCVGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM RISCV.txt -print-records)
 
 add_public_tablegen_target(RISCVCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanRISCV
+    COMMAND tdscan -gen_arch_spec --nowarnings RISCV.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating RISCV.mdl..."
+                    DEPENDS RISCVCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseRISCV
+    "${CMAKE_CURRENT_BINARY_DIR}/RISCVGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/RISCVGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/RISCVGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseRISCV}
+    COMMAND mdl RISCV.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating RISCVGenMdlInfo.inc..."
+                    DEPENDS TdScanRISCV
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseRISCV})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/RISCVGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/RISCVGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/RISCVGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseRISCV} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlRISCV DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlRISCV ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlRISCV PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlRISCV)
+endif()
+############### End Mdl
+
 add_llvm_target(RISCVCodeGen
   RISCVAsmPrinter.cpp
   RISCVCodeGenPrepare.cpp
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -19,6 +19,7 @@
 #include "RISCVTargetStreamer.h"
 #include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -42,6 +43,14 @@
 #define GET_SUBTARGETINFO_MC_DESC
 #include "RISCVGenSubtargetInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "RISCVGenMdlInfo.inc"
+#define RISCVCpuTable &RISCV::CpuTable
+#else
+#define RISCVCpuTable nullptr
+#endif
+
 using namespace llvm;
 
 static MCInstrInfo *createRISCVMCInstrInfo() {
@@ -81,7 +90,8 @@
   if (CPU.empty() || CPU == "generic")
     CPU = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32";
 
-  return createRISCVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createRISCVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                        RISCVCpuTable);
 }
 
 static MCInstPrinter *createRISCVMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -18,6 +18,7 @@
 #include "RISCVFrameLowering.h"
 #include "RISCVMacroFusion.h"
 #include "RISCVTargetMachine.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -29,6 +30,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "RISCVGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "RISCVGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "RISCVGenMdlTarget.inc"
+#define RISCVCpuTable &RISCV::CpuTable
+#else
+#define RISCVCpuTable nullptr
+#endif
+
 static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
                                           cl::init(true), cl::Hidden);
 
@@ -81,7 +92,7 @@
                                StringRef ABIName, unsigned RVVVectorBitsMin,
                                unsigned RVVVectorBitsMax,
                                const TargetMachine &TM)
-    : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+    : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS, RISCVCpuTable),
       RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
       FrameLowering(
           initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
@@ -93,6 +104,11 @@
   RegBankInfo.reset(RBI);
   InstSelector.reset(createRISCVInstructionSelector(
       *static_cast<const RISCVTargetMachine *>(&TM), *this, *RBI));
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  RISCV::CpuTable.SetInstrPredicates(&RISCV::InstrPredicates);
+#endif
 }
 
 const CallLowering *RISCVSubtarget::getCallLowering() const {
diff --git a/llvm/lib/Target/Sparc/CMakeLists.txt b/llvm/lib/Target/Sparc/CMakeLists.txt
--- a/llvm/lib/Target/Sparc/CMakeLists.txt
+++ b/llvm/lib/Target/Sparc/CMakeLists.txt
@@ -11,9 +11,51 @@
 tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM Sparc.txt -print-records)
 
 add_public_tablegen_target(SparcCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanSparc
+    COMMAND tdscan --family_name=SP -gen_arch_spec --nowarnings Sparc.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating Sparc.mdl..."
+                    DEPENDS SparcCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseSparc
+    "${CMAKE_CURRENT_BINARY_DIR}/SparcGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/SparcGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/SparcGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseSparc}
+    COMMAND mdl Sparc.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating SparcGenMdlInfo.inc..."
+                    DEPENDS TdScanSparc
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseSparc})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/SparcGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/SparcGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/SparcGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseSparc} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlSparc DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlSparc ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlSparc PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlSparc)
+endif()
+############### End Mdl
+
 add_llvm_target(SparcCodeGen
   DelaySlotFiller.cpp
   LeonPasses.cpp
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -15,6 +15,7 @@
 #include "SparcMCAsmInfo.h"
 #include "SparcTargetStreamer.h"
 #include "TargetInfo/SparcTargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -33,6 +34,14 @@
 #define GET_REGINFO_MC_DESC
 #include "SparcGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "SparcGenMdlInfo.inc"
+#define SparcCpuTable &SP::CpuTable
+#else
+#define SparcCpuTable nullptr
+#endif
+
 static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
                                        const Triple &TT,
                                        const MCTargetOptions &Options) {
@@ -69,7 +78,8 @@
 createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   if (CPU.empty())
     CPU = (TT.getArch() == Triple::sparcv9) ? "v9" : "v8";
-  return createSparcMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createSparcMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                        SparcCpuTable);
 }
 
 static MCTargetStreamer *
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -12,6 +12,7 @@
 
 #include "SparcSubtarget.h"
 #include "Sparc.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -23,6 +24,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "SparcGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "SparcGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "SparcGenMdlTarget.inc"
+#define SparcCpuTable &SP::CpuTable
+#else
+#define SparcCpuTable nullptr
+#endif
+
 void SparcSubtarget::anchor() { }
 
 SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -45,9 +56,17 @@
 SparcSubtarget::SparcSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS, const TargetMachine &TM,
                                bool is64Bit)
-    : SparcGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TargetTriple(TT),
-      Is64Bit(is64Bit), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
-      TLInfo(TM, *this), FrameLowering(*this) {}
+    : SparcGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, SparcCpuTable),
+      TargetTriple(TT), Is64Bit(is64Bit),
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      FrameLowering(*this) {
+  // Register the Target-library-specific predicate table in the cpu table.
+  // This table is only accessible if the Target library is included in an
+  // application.
+#if ENABLE_MDL_USE
+  SP::CpuTable.SetInstrPredicates(&SP::InstrPredicates);
+#endif
+}
 
 int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
 
diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt
--- a/llvm/lib/Target/SystemZ/CMakeLists.txt
+++ b/llvm/lib/Target/SystemZ/CMakeLists.txt
@@ -11,9 +11,51 @@
 tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM SystemZ.txt -print-records)
 
 add_public_tablegen_target(SystemZCommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanSystemZ
+    COMMAND tdscan -gen_arch_spec --nowarnings SystemZ.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating SystemZ.mdl..."
+                    DEPENDS SystemZCommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseSystemZ
+    "${CMAKE_CURRENT_BINARY_DIR}/SystemZGenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/SystemZGenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/SystemZGenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseSystemZ}
+    COMMAND mdl SystemZ.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating SystemZGenMdlInfo.inc..."
+                    DEPENDS TdScanSystemZ
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseSystemZ})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/SystemZGenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/SystemZGenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/SystemZGenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseSystemZ} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlSystemZ DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlSystemZ ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlSystemZ PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlSystemZ)
+endif()
+############### End Md
+
 add_llvm_target(SystemZCodeGen
   SystemZAsmPrinter.cpp
   SystemZCallingConv.cpp
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -11,6 +11,7 @@
 #include "SystemZMCAsmInfo.h"
 #include "SystemZTargetStreamer.h"
 #include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInst.h"
@@ -32,6 +33,14 @@
 #define GET_REGINFO_MC_DESC
 #include "SystemZGenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "SystemZGenMdlInfo.inc"
+#define SystemZCpuTable &SystemZ::CpuTable
+#else
+#define SystemZCpuTable nullptr
+#endif
+
 const unsigned SystemZMC::GR32Regs[16] = {
   SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L,
   SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L,
@@ -178,7 +187,8 @@
 
 static MCSubtargetInfo *
 createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  return createSystemZMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createSystemZMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                          SystemZCpuTable);
 }
 
 static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
--- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -187,7 +187,7 @@
       FU = "LSU";
     OS << "/" << FU;
 
-    if (PI->ReleaseAtCycle> 1)
+    if (PI->ReleaseAtCycle > 1)
       OS << "(" << PI->ReleaseAtCycle << "cyc)";
   }
 
@@ -266,6 +266,8 @@
           MI->getOpcode() == SystemZ::CondTrap);
 }
 
+// TODO-MDL - Write an MDL-specific version of this.
+
 // Update state with SU as the next scheduled unit.
 void SystemZHazardRecognizer::
 EmitInstruction(SUnit *SU) {
@@ -383,6 +385,7 @@
   return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
 }
 
+// TODO-MDL - Write an MDL-specific version of this.
 int SystemZHazardRecognizer::
 resourcesCost(SUnit *SU) {
   int Cost = 0;
@@ -407,6 +410,7 @@
   return Cost;
 }
 
+// TODO-MDL - Write an MDL-specific version of this.
 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
                                               bool TakenBranch) {
   // Make a temporary SUnit.
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -9,6 +9,7 @@
 #include "SystemZSubtarget.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -20,6 +21,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "SystemZGenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "SystemZGenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "SystemZGenMdlTarget.inc"
+#define SystemZCpuTable &SystemZ::CpuTable
+#else
+#define SystemZCpuTable nullptr
+#endif
+
 static cl::opt<bool> UseSubRegLiveness(
     "systemz-subreg-liveness",
     cl::desc("Enable subregister liveness tracking for SystemZ (experimental)"),
@@ -69,10 +80,16 @@
                                    const std::string &TuneCPU,
                                    const std::string &FS,
                                    const TargetMachine &TM)
-    : SystemZGenSubtargetInfo(TT, CPU, TuneCPU, FS), TargetTriple(TT),
-      SpecialRegisters(initializeSpecialRegisters()),
+    : SystemZGenSubtargetInfo(TT, CPU, TuneCPU, FS, SystemZCpuTable),
+      TargetTriple(TT), SpecialRegisters(initializeSpecialRegisters()),
       InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
-      TLInfo(TM, *this), FrameLowering(SystemZFrameLowering::create(*this)) {}
+      TLInfo(TM, *this), FrameLowering(SystemZFrameLowering::create(*this)) {
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  SystemZ::CpuTable.SetInstrPredicates(&SystemZ::InstrPredicates);
+#endif
+}
 
 bool SystemZSubtarget::enableSubRegLiveness() const {
   return UseSubRegLiveness;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -58,7 +58,7 @@
                                                 StringRef FS) {
   if (CPU.empty())
     CPU = "generic";
-  return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS);
+  return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS, nullptr);
 }
 
 static MCTargetStreamer *
diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp
--- a/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/llvm/lib/Target/VE/VESubtarget.cpp
@@ -43,9 +43,9 @@
 
 VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU,
                          const std::string &FS, const TargetMachine &TM)
-    : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      FrameLowering(*this) {}
+    : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS, nullptr),
+      TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+      TLInfo(TM, *this), FrameLowering(*this) {}
 
 uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const {
   // Calculate adjusted frame size by adding the size of RSA frame,
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -96,7 +96,8 @@
 
 static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
                                               StringRef FS) {
-  return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS,
+                                              nullptr);
 }
 
 static MCTargetStreamer *
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -41,7 +41,7 @@
                                            const std::string &CPU,
                                            const std::string &FS,
                                            const TargetMachine &TM)
-    : WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr),
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this) {}
 
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -18,10 +18,52 @@
 tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
 tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM X86.txt -print-records)
 tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables -asmwriternum=1)
 
 add_public_tablegen_target(X86CommonTableGen)
 
+############### Begin Mdl
+if(LLVM_ENABLE_MDL)
+  add_custom_target(TdScanX86
+    COMMAND tdscan -gen_arch_spec --nowarnings X86.txt
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+                    COMMENT "Generating X86.mdl..."
+                    DEPENDS X86CommonTableGen
+                    VERBATIM)
+
+  set(MdlDatabaseX86
+    "${CMAKE_CURRENT_BINARY_DIR}/X86GenMdlInfo.inc"
+    "${CMAKE_CURRENT_BINARY_DIR}/X86GenMdlInfo.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/X86GenMdlTarget.inc"
+  )
+
+  add_custom_command(OUTPUT ${MdlDatabaseX86}
+    COMMAND mdl X86.mdl
+                    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        COMMENT "Generating X86GenMdlInfo.inc..."
+                    DEPENDS TdScanX86
+                    VERBATIM)
+
+  # tablegen macro
+  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${MdlDatabaseX86})
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/X86GenMdlInfo.inc")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/X86GenMdlInfo.h")
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} "${CMAKE_CURRENT_BINARY_DIR}/X86GenMdlTarget.inc")
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${MdlDatabaseX86} PROPERTIES GENERATED 1)
+
+  # add_public_tablegen_target macro
+  add_custom_target(MdlX86 DEPENDS ${TABLEGEN_OUTPUT})
+
+  if(LLVM_COMMON_DEPENDS)
+    add_dependencies(MdlX86 ${LLVM_COMMON_DEPENDS})
+  endif()
+
+  set_target_properties(MdlX86 PROPERTIES FOLDER "MdlGeneration")
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} MdlX86)
+endif()
+############### End Mdl
+
 set(sources
   X86ArgumentStackSlotRebase.cpp
   X86AsmPrinter.cpp
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -18,6 +18,7 @@
 #include "X86MCAsmInfo.h"
 #include "X86TargetStreamer.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrAnalysis.h"
@@ -36,6 +37,14 @@
 #define GET_REGINFO_MC_DESC
 #include "X86GenRegisterInfo.inc"
 
+// Include the generated MDL database.
+#if ENABLE_MDL_USE
+#include "X86GenMdlInfo.inc"
+#define X86CpuTable &X86::CpuTable
+#else
+#define X86CpuTable nullptr
+#endif
+
 #define GET_INSTRINFO_MC_DESC
 #define GET_INSTRINFO_MC_HELPERS
 #define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -397,7 +406,8 @@
   if (CPU.empty())
     CPU = "generic";
 
-  return createX86MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
+  return createX86MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS,
+                                      X86CpuTable);
 }
 
 static MCInstrInfo *createX86MCInstrInfo() {
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Function.h"
@@ -47,6 +48,16 @@
 #define GET_SUBTARGETINFO_CTOR
 #include "X86GenSubtargetInfo.inc"
 
+// Include definitions associated with the MDL description.
+#if ENABLE_MDL_USE
+#include "X86GenMdlInfo.h"
+// Include virtual predicate function definitions from the MDL description.
+#include "X86GenMdlTarget.inc"
+#define X86CpuTable &X86::CpuTable
+#else
+#define X86CpuTable nullptr
+#endif
+
 // Temporary option to control early if-conversion for x86 while adding machine
 // models.
 static cl::opt<bool>
@@ -315,7 +326,7 @@
                            MaybeAlign StackAlignOverride,
                            unsigned PreferVectorWidthOverride,
                            unsigned RequiredVectorWidth)
-    : X86GenSubtargetInfo(TT, CPU, TuneCPU, FS),
+    : X86GenSubtargetInfo(TT, CPU, TuneCPU, FS, X86CpuTable),
       PICStyle(PICStyles::Style::None), TM(TM), TargetTriple(TT),
       StackAlignOverride(StackAlignOverride),
       PreferVectorWidthOverride(PreferVectorWidthOverride),
@@ -340,6 +351,11 @@
   auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
   RegBankInfo.reset(RBI);
   InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
+
+  // Register the Target-library-specific predicate table in the cpu table.
+#if ENABLE_MDL_USE
+  X86::CpuTable.SetInstrPredicates(&X86::InstrPredicates);
+#endif
 }
 
 const CallLowering *X86Subtarget::getCallLowering() const {
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -52,7 +52,7 @@
 
 static MCSubtargetInfo *
 createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  return createXCoreMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+  return createXCoreMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr);
 }
 
 static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
--- a/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -26,5 +26,5 @@
 
 XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS, const TargetMachine &TM)
-    : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(*this),
-      TLInfo(TM, *this) {}
+    : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS, nullptr),
+      FrameLowering(*this), TLInfo(TM, *this) {}
diff --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc
--- a/llvm/unittests/CodeGen/MFCommon.inc
+++ b/llvm/unittests/CodeGen/MFCommon.inc
@@ -76,7 +76,7 @@
 public:
   BogusSubtarget(TargetMachine &TM)
       : TargetSubtargetInfo(Triple(""), "", "", "", {}, {}, nullptr, nullptr,
-                            nullptr, nullptr, nullptr, nullptr),
+                            nullptr, nullptr, nullptr, nullptr, nullptr),
         FL(), TL(TM) {}
   ~BogusSubtarget() override {}
 
diff --git a/llvm/unittests/Target/AArch64/AArch64InstPrinterTest.cpp b/llvm/unittests/Target/AArch64/AArch64InstPrinterTest.cpp
--- a/llvm/unittests/Target/AArch64/AArch64InstPrinterTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64InstPrinterTest.cpp
@@ -39,7 +39,7 @@
   MCSubtargetInfo STI(Triple(""), "", "", "",
                       ArrayRef((SubtargetFeatureKV *)NULL, (size_t)0),
                       ArrayRef((SubtargetSubTypeKV *)NULL, (size_t)0), NULL,
-                      NULL, NULL, NULL, NULL, NULL);
+                      NULL, NULL, NULL, NULL, NULL, NULL);
   MCContext Ctx(Triple(""), &MAI, &MRI, &STI);
   MCInst MI;
 
diff --git a/llvm/utils/MdlCompiler/CMakeLists.txt b/llvm/utils/MdlCompiler/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/CMakeLists.txt
@@ -0,0 +1,63 @@
+cmake_minimum_required(VERSION 3.13.4)
+project(mdl)
+
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
+
+# required if linking to static library
+add_definitions(-DANTLR4CPP_STATIC)
+
+# using /MD flag for antlr4_runtime (for Visual C++ compilers only)
+set(ANTLR4_WITH_STATIC_CRT OFF)
+# add external build for antlrcpp
+include(ExternalAntlr4Cpp)
+# add antrl4cpp artifacts to project environment
+include_directories(${ANTLR4_INCLUDE_DIRS})
+
+# We look in /usr/local/lib by default.  Add more places to look for Antlr.
+# set(ANTLR_PATHS ...)
+
+# Antlr versions prior to 4.10 work, but we prefer newer versions.
+find_package(ANTLR 4.9 REQUIRED)
+message(STATUS "Antlr version: " ${ANTLR_VERSION})
+
+# Call macro to add grammar to your build dependencies.
+antlr_target(MdlGrammarParser mdl.g4 PACKAGE  mpact::mdl::generated
+	OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/libs/)
+
+# include generated files in project environment
+include_directories(${ANTLR_MdlGrammarParser_OUTPUT_DIR})
+
+## Add LLVM
+#### find_package(LLVM REQUIRED CONFIG)
+#### message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+#### message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
+
+#### include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
+#### separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
+#### add_definitions(${LLVM_DEFINITIONS_LIST})
+
+# Find the libraries that correspond to the LLVM components
+llvm_map_components_to_libnames(llvm_libs support)
+
+# We're going to compile a few components from LLVMSupport with RTTI. So find
+# the source directory.
+get_target_property(support_sources LLVMSupport SOURCES)
+get_target_property(support_dir LLVMSupport SOURCE_DIR)
+
+# add generated grammar to mdl binary target
+add_executable(mdl mdl_main.cpp mdl.cpp mdl.h mdl_dump.cpp
+               mdl_generate.cpp mdl_generate.h mdl_output.cpp mdl_output.h
+               mdl_predicate.cpp mdl_util.cpp mdl_visitor.cpp mdl_visitor.h
+	       ${ANTLR_MdlGrammarParser_CXX_OUTPUTS}
+               ${support_dir}/CommandLine.cpp
+	       ${support_dir}/FormatVariadic.cpp
+               ${support_dir}/Error.cpp)
+
+# we must compile Antlr and the mdl compiler with RTTI (because of Antlr)
+set_property(TARGET mdl PROPERTY CXX_STANDARD 17)
+set_target_properties(mdl PROPERTIES COMPILE_FLAGS "-frtti")
+set_target_properties(antlr4_static PROPERTIES COMPILE_FLAGS "-frtti")
+
+# Link against LLVMSupport (compiled without RTTI) and Antlr libraries
+target_link_libraries(mdl ${llvm_libs} antlr4_static)
+
diff --git a/llvm/utils/MdlCompiler/cmake/ExternalAntlr4Cpp.cmake b/llvm/utils/MdlCompiler/cmake/ExternalAntlr4Cpp.cmake
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/cmake/ExternalAntlr4Cpp.cmake
@@ -0,0 +1,154 @@
+cmake_minimum_required(VERSION 3.7)
+
+include(ExternalProject)
+
+set(ANTLR4_ROOT ${CMAKE_CURRENT_BINARY_DIR}/antlr4_runtime/src/antlr4_runtime)
+set(ANTLR4_INCLUDE_DIRS ${ANTLR4_ROOT}/runtime/Cpp/runtime/src)
+set(ANTLR4_GIT_REPOSITORY https://github.com/antlr/antlr4.git)
+if(NOT DEFINED ANTLR4_TAG)
+  # Set to branch name to keep library updated at the cost of needing to rebuild after 'clean'
+  # Set to commit hash to keep the build stable and does not need to rebuild after 'clean'
+  set(ANTLR4_TAG master)
+endif()
+
+if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
+  set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist/$(Configuration))
+elseif(${CMAKE_GENERATOR} MATCHES "Xcode.*")
+  set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist/$(CONFIGURATION))
+else()
+  set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist)
+endif()
+
+if(MSVC)
+  set(ANTLR4_STATIC_LIBRARIES
+      ${ANTLR4_OUTPUT_DIR}/antlr4-runtime-static.lib)
+  set(ANTLR4_SHARED_LIBRARIES
+      ${ANTLR4_OUTPUT_DIR}/antlr4-runtime.lib)
+  set(ANTLR4_RUNTIME_LIBRARIES
+      ${ANTLR4_OUTPUT_DIR}/antlr4-runtime.dll)
+else()
+  set(ANTLR4_STATIC_LIBRARIES
+      ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.a)
+  if(MINGW)
+    set(ANTLR4_SHARED_LIBRARIES
+        ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll.a)
+    set(ANTLR4_RUNTIME_LIBRARIES
+        ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll)
+  elseif(CYGWIN)
+    set(ANTLR4_SHARED_LIBRARIES
+        ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll.a)
+    set(ANTLR4_RUNTIME_LIBRARIES
+        ${ANTLR4_OUTPUT_DIR}/cygantlr4-runtime-4.9.1.dll)
+  elseif(APPLE)
+    set(ANTLR4_RUNTIME_LIBRARIES
+        ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dylib)
+  else()
+    set(ANTLR4_RUNTIME_LIBRARIES
+        ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.so)
+  endif()
+endif()
+
+if(${CMAKE_GENERATOR} MATCHES ".* Makefiles")
+  # This avoids
+  # 'warning: jobserver unavailable: using -j1. Add '+' to parent make rule.'
+  set(ANTLR4_BUILD_COMMAND $(MAKE))
+elseif(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
+  set(ANTLR4_BUILD_COMMAND
+      ${CMAKE_COMMAND}
+          --build .
+          --config $(Configuration)
+          --target)
+elseif(${CMAKE_GENERATOR} MATCHES "Xcode.*")
+  set(ANTLR4_BUILD_COMMAND
+      ${CMAKE_COMMAND}
+          --build .
+          --config $(CONFIGURATION)
+          --target)
+else()
+  set(ANTLR4_BUILD_COMMAND
+      ${CMAKE_COMMAND}
+          --build .
+          --target)
+endif()
+
+if(NOT DEFINED ANTLR4_WITH_STATIC_CRT)
+  set(ANTLR4_WITH_STATIC_CRT ON)
+endif()
+
+if(ANTLR4_ZIP_REPOSITORY)
+  ExternalProject_Add(
+      antlr4_runtime
+      PREFIX antlr4_runtime
+      URL ${ANTLR4_ZIP_REPOSITORY}
+      DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
+      BUILD_COMMAND ""
+      BUILD_IN_SOURCE 1
+      SOURCE_DIR ${ANTLR4_ROOT}
+      SOURCE_SUBDIR runtime/Cpp
+      CMAKE_CACHE_ARGS
+          -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+          -DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT}
+      INSTALL_COMMAND ""
+      EXCLUDE_FROM_ALL 1)
+else()
+  ExternalProject_Add(
+      antlr4_runtime
+      PREFIX antlr4_runtime
+      GIT_REPOSITORY ${ANTLR4_GIT_REPOSITORY}
+      GIT_TAG ${ANTLR4_TAG}
+      DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
+      BUILD_COMMAND ""
+      BUILD_IN_SOURCE 1
+      SOURCE_DIR ${ANTLR4_ROOT}
+      SOURCE_SUBDIR runtime/Cpp
+      CMAKE_CACHE_ARGS
+          -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+          -DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT}
+      INSTALL_COMMAND ""
+      EXCLUDE_FROM_ALL 1)
+endif()
+
+# Seperate build step as rarely people want both
+set(ANTLR4_BUILD_DIR ${ANTLR4_ROOT})
+if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0")
+  # CMake 3.14 builds in above's SOURCE_SUBDIR when BUILD_IN_SOURCE is true
+  set(ANTLR4_BUILD_DIR ${ANTLR4_ROOT}/runtime/Cpp)
+endif()
+
+ExternalProject_Add_Step(
+    antlr4_runtime
+    build_static
+    COMMAND ${ANTLR4_BUILD_COMMAND} antlr4_static
+    # Depend on target instead of step (a custom command)
+    # to avoid running dependent steps concurrently
+    DEPENDS antlr4_runtime
+    BYPRODUCTS ${ANTLR4_STATIC_LIBRARIES}
+    EXCLUDE_FROM_MAIN 1
+    WORKING_DIRECTORY ${ANTLR4_BUILD_DIR})
+ExternalProject_Add_StepTargets(antlr4_runtime build_static)
+
+add_library(antlr4_static STATIC IMPORTED)
+add_dependencies(antlr4_static antlr4_runtime-build_static)
+set_target_properties(antlr4_static PROPERTIES
+                      IMPORTED_LOCATION ${ANTLR4_STATIC_LIBRARIES})
+
+ExternalProject_Add_Step(
+    antlr4_runtime
+    build_shared
+    COMMAND ${ANTLR4_BUILD_COMMAND} antlr4_shared
+    # Depend on target instead of step (a custom command)
+    # to avoid running dependent steps concurrently
+    DEPENDS antlr4_runtime
+    BYPRODUCTS ${ANTLR4_SHARED_LIBRARIES} ${ANTLR4_RUNTIME_LIBRARIES}
+    EXCLUDE_FROM_MAIN 1
+    WORKING_DIRECTORY ${ANTLR4_BUILD_DIR})
+ExternalProject_Add_StepTargets(antlr4_runtime build_shared)
+
+add_library(antlr4_shared SHARED IMPORTED)
+add_dependencies(antlr4_shared antlr4_runtime-build_shared)
+set_target_properties(antlr4_shared PROPERTIES
+                      IMPORTED_LOCATION ${ANTLR4_RUNTIME_LIBRARIES})
+if(ANTLR4_SHARED_LIBRARIES)
+  set_target_properties(antlr4_shared PROPERTIES
+                        IMPORTED_IMPLIB ${ANTLR4_SHARED_LIBRARIES})
+endif()
diff --git a/llvm/utils/MdlCompiler/cmake/FindANTLR.cmake b/llvm/utils/MdlCompiler/cmake/FindANTLR.cmake
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/cmake/FindANTLR.cmake
@@ -0,0 +1,133 @@
+find_package(Java QUIET COMPONENTS Runtime)
+
+set (ANTLR_NAMES
+    antlr-4.11.1-complete.jar
+    antlr-4.11.0-complete.jar
+    antlr-4.11-complete.jar
+    antlr-4.10.1-complete.jar
+    antlr-4.10-complete.jar)
+
+if(NOT ANTLR_EXECUTABLE)
+	find_program(ANTLR_EXECUTABLE NAMES ${ANTLR_NAMES}
+	                              PATHS /usr/local/lib ${ANTLR_PATHS})
+endif()
+
+if(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE)
+  execute_process(
+      COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}
+      OUTPUT_VARIABLE ANTLR_COMMAND_OUTPUT
+      ERROR_VARIABLE ANTLR_COMMAND_ERROR
+      RESULT_VARIABLE ANTLR_COMMAND_RESULT
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  if(ANTLR_COMMAND_RESULT EQUAL 0)
+    string(REGEX MATCH "Version [0-9]+(\\.[0-9]+)*"
+           ANTLR_VERSION ${ANTLR_COMMAND_OUTPUT})
+    string(REPLACE "Version " "" ANTLR_VERSION ${ANTLR_VERSION})
+  else()
+    message(
+        SEND_ERROR
+        "Command '${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}' "
+        "failed with the output '${ANTLR_COMMAND_ERROR}'")
+  endif()
+
+  macro(ANTLR_TARGET Name InputFile)
+    set(ANTLR_OPTIONS LEXER PARSER LISTENER VISITOR)
+    set(ANTLR_ONE_VALUE_ARGS PACKAGE OUTPUT_DIRECTORY DEPENDS_ANTLR)
+    set(ANTLR_MULTI_VALUE_ARGS COMPILE_FLAGS DEPENDS)
+
+    cmake_parse_arguments(ANTLR_TARGET
+                          "${ANTLR_OPTIONS}"
+                          "${ANTLR_ONE_VALUE_ARGS}"
+                          "${ANTLR_MULTI_VALUE_ARGS}"
+                          ${ARGN})
+
+    set(ANTLR_${Name}_INPUT ${InputFile})
+
+    get_filename_component(ANTLR_INPUT ${InputFile} NAME_WE)
+
+    if(ANTLR_TARGET_OUTPUT_DIRECTORY)
+      set(ANTLR_${Name}_OUTPUT_DIR ${ANTLR_TARGET_OUTPUT_DIRECTORY})
+    else()
+      set(ANTLR_${Name}_OUTPUT_DIR
+          ${CMAKE_CURRENT_BINARY_DIR}/antlr4cpp_generated_src/${ANTLR_INPUT})
+    endif()
+
+    unset(ANTLR_${Name}_CXX_OUTPUTS)
+
+    if((ANTLR_TARGET_LEXER AND NOT ANTLR_TARGET_PARSER) OR
+       (ANTLR_TARGET_PARSER AND NOT ANTLR_TARGET_LEXER))
+      list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.cpp)
+      set(ANTLR_${Name}_OUTPUTS
+          ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.interp
+          ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.tokens)
+    else()
+      list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.cpp
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.cpp)
+      list(APPEND ANTLR_${Name}_OUTPUTS
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.interp
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.tokens)
+    endif()
+
+    if(ANTLR_TARGET_LISTENER)
+      list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.cpp
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.cpp)
+      list(APPEND ANTLR_TARGET_COMPILE_FLAGS -listener)
+    endif()
+
+    if(ANTLR_TARGET_VISITOR)
+      list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.cpp
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.h
+           ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.cpp)
+      list(APPEND ANTLR_TARGET_COMPILE_FLAGS -visitor)
+    endif()
+
+    if(ANTLR_TARGET_PACKAGE)
+      list(APPEND ANTLR_TARGET_COMPILE_FLAGS -package ${ANTLR_TARGET_PACKAGE})
+    endif()
+
+    list(APPEND ANTLR_${Name}_OUTPUTS ${ANTLR_${Name}_CXX_OUTPUTS})
+
+    if(ANTLR_TARGET_DEPENDS_ANTLR)
+      if(ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT)
+        list(APPEND ANTLR_TARGET_DEPENDS
+             ${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT})
+        list(APPEND ANTLR_TARGET_DEPENDS
+             ${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_OUTPUTS})
+      else()
+        message(SEND_ERROR
+                "ANTLR target '${ANTLR_TARGET_DEPENDS_ANTLR}' not found")
+      endif()
+    endif()
+
+    add_custom_command(
+        OUTPUT ${ANTLR_${Name}_OUTPUTS}
+        COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}
+                ${InputFile}
+                -o ${ANTLR_${Name}_OUTPUT_DIR}
+                -no-listener
+                -Dlanguage=Cpp
+                ${ANTLR_TARGET_COMPILE_FLAGS}
+        DEPENDS ${InputFile}
+                ${ANTLR_TARGET_DEPENDS}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        COMMENT "Building ${Name} with ANTLR ${ANTLR_VERSION}")
+  endmacro(ANTLR_TARGET)
+
+endif(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+    ANTLR
+    REQUIRED_VARS ANTLR_EXECUTABLE Java_JAVA_EXECUTABLE
+    VERSION_VAR ANTLR_VERSION)
diff --git a/llvm/utils/MdlCompiler/mdl.h b/llvm/utils/MdlCompiler/mdl.h
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl.h
@@ -0,0 +1,2807 @@
+//===- mdl.h - Definitions for organizing a machine description -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of class definitions that correspond to
+// constructs in the parsed machine description language (MDL), and are
+// used to collect and organize all the machine details from the Antlr
+// parse tree, so that they are in a more convenient, accessible format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MDL_COMPILER_MDL_H_
+#define MDL_COMPILER_MDL_H_
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <ostream>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "antlr4-runtime.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace mpact {
+namespace mdl {
+
+//----------------------------------------------------------------------------
+// Definitions of objects to hold components of the description.
+//----------------------------------------------------------------------------
+class PipePhases;
+class Identifier;
+class PhaseName;
+class RegisterDef;
+class RegisterClass;
+class RegisterClassRef;
+class ResourceDef;
+class ResourceRef;
+class CpuInstance;
+class ClusterInstance;
+class FuncUnitInstance;
+class ForwardStmt;
+class SubUnitInstance;
+class LatencyInstance;
+class Params;
+class FuncUnitTemplate;
+class FuncUnitGroup;
+class Connect;
+class FuncUnitUse;
+class SubUnitTemplate;
+class LatencyTemplate;
+class Reference;
+class ConditionalRef;
+class PhaseExpr;
+class OperandRef;
+class InstructionDef;
+class OperandDef;
+class OperandAttribute;
+class PredValue;
+class OperandDecl;
+class PredExpr;
+
+//----------------------------------------------------------------------------
+// Containers for managing instantiation of CPUs, clusters, functional units,
+// subunits, and latencies.
+//----------------------------------------------------------------------------
+class MdlSpec;
+class FuncUnitInstantiation;
+class SubUnitInstantiation;
+class LatencyInstantiation;
+
+// Descriptor of the overall compiler output state.
+class OutputState;
+
+//----------------------------------------------------------------------------
+// This represents a map of all subunit instantiations.  For each subunit
+// template, we have a list of every instance of that subunit, and the
+// context in which it was instantiated.
+//----------------------------------------------------------------------------
+using SubUnitInstantiations =
+    std::map<std::string, std::vector<SubUnitInstantiation *> *>;
+
+//----------------------------------------------------------------------------
+// This represents a map of functional unit templates to client functional
+// unit instances. We build one of these for each CPU.
+//----------------------------------------------------------------------------
+using FuncUnitInstances =
+    std::map<std::string, std::vector<FuncUnitInstantiation *>>;
+
+//----------------------------------------------------------------------------
+// This represents a map of functional unit instantiations for each cluster.
+//----------------------------------------------------------------------------
+using FuncUnitInstantiations = std::vector<FuncUnitInstantiation *>;
+
+//----------------------------------------------------------------------------
+// Containers for collections of components.
+//----------------------------------------------------------------------------
+using IdList = std::vector<Identifier *>;
+using RegisterDefList = std::vector<RegisterDef *>;
+using RegisterClassList = std::vector<RegisterClass *>;
+using PipeDefList = std::vector<PipePhases *>;
+using PhaseNameList = std::vector<PhaseName *>;
+using ResourceDefList = std::vector<ResourceDef *>;
+using ResourceRefList = std::vector<ResourceRef *>;
+using CpuList = std::vector<CpuInstance *>;
+using ParamsList = std::vector<Params *>;
+using FuncUnitInstList = std::vector<FuncUnitInstance *>;
+using SubUnitInstList = std::vector<SubUnitInstance *>;
+using ForwardStmtList = std::vector<ForwardStmt *>;
+using FuncUnitList = std::vector<FuncUnitTemplate *>;
+using FuncUnitGroupList = std::vector<FuncUnitGroup *>;
+using FuncUnitUseSet = std::vector<std::vector<FuncUnitUse *>>;
+using SubUnitList = std::vector<SubUnitTemplate *>;
+using LatencyList = std::vector<LatencyTemplate *>;
+using LatencyInstList = std::vector<LatencyInstance *>;
+using ClusterList = std::vector<ClusterInstance *>;
+using ConnectList = std::vector<Connect *>;
+using ReferenceList = std::vector<Reference *>;
+using ConditionalRefList = std::vector<ConditionalRef *>;
+using OperandRefList = std::vector<OperandRef *>;
+using InstructionList = std::vector<InstructionDef *>;
+using OperandDefList = std::vector<OperandDef *>;
+using OperandDeclList = std::vector<OperandDecl *>;
+using OperandAttributeList = std::vector<OperandAttribute *>;
+using PredValueList = std::vector<PredValue *>;
+
+using IdDict = std::map<std::string, Identifier *>;
+using CpuDict = std::map<std::string, CpuInstance *>;
+using FuncUnitDict = std::map<std::string, FuncUnitTemplate *>;
+using FuncUnitGroupDict = std::map<std::string, FuncUnitGroup *>;
+using SubUnitDict = std::map<std::string, SubUnitTemplate *>;
+using LatencyDict = std::map<std::string, LatencyTemplate *>;
+using OperandDict = std::map<std::string, OperandDef *>;
+using InstructionDict = std::map<std::string, InstructionDef *>;
+using SubUnitInstrs = std::map<std::string, InstructionList>;
+using StringList = std::vector<std::string>;
+
+using ResourceRefDict = std::map<std::string, ResourceRef *>;
+using ResourceDefDict = std::map<std::string, ResourceDef *>;
+using RegisterClassDict = std::map<std::string, RegisterClass *>;
+using RegisterClassRefDict = std::map<std::string, RegisterClassRef *>;
+
+//----------------------------------------------------------------------------
+// Template for writing out vectors of pointers to objects.
+//----------------------------------------------------------------------------
+template <typename T>
+std::ostream &PrintVec(std::ostream &out, const std::vector<T> *v,
+                       std::string head = "", std::string sep = "\n",
+                       std::string end = "\n") {
+  if (!v)
+    return out;
+  out << head;
+
+  for (auto *item : *v) {
+    out << *item;
+    if (item != v->back())
+      out << sep;
+  }
+  return out << end;
+}
+
+//----------------------------------------------------------------------------
+// Template function for stringizing vectors of pointers to objects.
+//----------------------------------------------------------------------------
+template <typename T>
+std::string StringVec(const std::vector<T> *v, std::string head = "",
+                      std::string sep = "\n", std::string end = "\n") {
+  if (!v)
+    return "";
+  std::string out = head;
+
+  for (auto *item : *v) {
+    out += item->ToString();
+    if (item != v->back())
+      out += sep;
+  }
+
+  return out + end;
+}
+//----------------------------------------------------------------------------
+// Template function for stringizing sets of strings.
+//----------------------------------------------------------------------------
+template <typename T>
+std::string StringSet(const std::set<T> *v, std::string head = "",
+                      std::string sep = "\n", std::string end = "\n") {
+  if (!v)
+    return "";
+  std::string out = head;
+
+  for (auto &item : *v) {
+    out += item;
+    if (item != *v->rbegin())
+      out += sep;
+  }
+
+  return out + end;
+}
+
+//----------------------------------------------------------------------------
+// Template function to find an MDL item in a vector, by name.
+//----------------------------------------------------------------------------
+template <typename A>
+A *FindItem(std::vector<A *> &items, const std::string &name) {
+  for (auto *a_item : items)
+    if (a_item->name() == name)
+      return a_item;
+  return nullptr;
+}
+
+//----------------------------------------------------------------------------
+// Template function to find an MDL item in a map of pointers, by name.
+//----------------------------------------------------------------------------
+template <typename A>
+A *FindItem(std::map<std::string, A *> &items, const std::string &name) {
+  auto it = items.find(name);
+  return (it == items.end()) ? nullptr : it->second;
+}
+
+//----------------------------------------------------------------------------
+// "Internal" names contain a colon character, so that they never conflict
+// with user-defined names.
+//----------------------------------------------------------------------------
+inline bool is_catchall_name(const std::string &name) {
+  return name.find(':') != std::string::npos;
+}
+
+//----------------------------------------------------------------------------
+// Define a base class that contains source information for each object.
+//----------------------------------------------------------------------------
+class MdlItem {
+public:
+  explicit MdlItem(antlr4::ParserRuleContext *ctx)
+      : line_(ctx->getStart()->getLine()),
+        column_(ctx->getStart()->getCharPositionInLine()) {}
+  explicit MdlItem(antlr4::ParserRuleContext *ctx, std::string &file_name)
+      : line_(ctx->getStart()->getLine()),
+        column_(ctx->getStart()->getCharPositionInLine()),
+        file_name_(file_name) {}
+  MdlItem(const MdlItem &item)
+      : line_(item.line()), column_(item.column()),
+        file_name_(item.file_name_) {}
+  MdlItem() : line_(0), column_(0) {}
+
+  int line() const { return line_; }
+  int column() const { return column_; }
+  const std::string &file_name() const { return file_name_; }
+  std::string Location() const {
+    return llvm::formatv("{0}:{1}:{2}", file_name_, std::to_string(line_),
+                         std::to_string(column_ + 1));
+  }
+
+private:
+  int line_;   // Lexical line number of this item.
+  int column_; // Lexical column number of this item.
+  std::string file_name_;
+};
+
+//----------------------------------------------------------------------------
+// An instance of a name. Used anywhere that an identifier is used
+// in the machine description.
+//----------------------------------------------------------------------------
+class Identifier : public MdlItem {
+public:
+  // Create a general identifier.
+  Identifier(const MdlItem &item, std::string name)
+      : MdlItem(item), name_(name) {}
+  // Used to create identifiers used in resource groups.
+  Identifier(Identifier *item, int index)
+      : MdlItem(*item), name_(item->name_), index_(index) {}
+  // Used to generate internal names that don't map back to source code.
+  explicit Identifier(std::string name) : MdlItem(), name_(name) {}
+
+  bool operator!=(const Identifier &rhs) { return name_ != rhs.name(); }
+  std::string ToString() const;
+  std::string const &name() const { return name_; }
+  void set_index(int index) { index_ = index; }
+  int index() const { return index_; }
+  bool is_vararg() const { return name_[0] == '$'; }
+  int vararg_index() const { return std::stoi(name_.substr(1)); }
+  bool is_number() const { return isdigit(name_[0]); }
+  int get_number() const { return std::stoi(name_); }
+
+private:
+  const std::string name_; // Name used anywhere in machine description.
+  int index_ = -1;         // If in an IdList, its 0-based position in list.
+};
+
+//----------------------------------------------------------------------------
+// An instance of a pipe phase name, defined in a pipeline definition.
+//----------------------------------------------------------------------------
+class PhaseName : public MdlItem {
+public:
+  PhaseName(const MdlItem &item, std::string name, bool is_protected,
+            bool is_hard)
+      : MdlItem(item), name_(name), is_protected_(is_protected),
+        is_hard_(is_hard) {}
+  explicit PhaseName(std::string name) : MdlItem(), name_(name) {}
+
+  std::string ToString() const;
+  std::string FormatProtection() const;
+  std::string const &name() const { return name_; }
+  void set_index(int index) { index_ = index; }
+  int index() const { return index_; }
+  bool is_protected() const { return is_protected_; }
+  bool is_unprotected() const { return !is_protected_; }
+  bool is_hard() const { return is_hard_; }
+
+private:
+  const std::string name_;   // name of the pipeline phase
+  int index_ = -1;           // If in an IdList, its 0-based position in list
+  bool is_protected_ = true; // true if this is in a protected pipeline
+  bool is_hard_ = false;     // true if this is in a "hard" pipeline
+};
+
+//----------------------------------------------------------------------------
+// An instance of a register definition.
+//----------------------------------------------------------------------------
+class RegisterDef : public MdlItem {
+public:
+  RegisterDef(const MdlItem &item, Identifier *id) : MdlItem(item), id_(id) {}
+
+  std::string ToString() const;
+  std::string const &name() const { return id_->name(); }
+
+private:
+  const Identifier *id_; // Identifier associated with register.
+};
+
+//----------------------------------------------------------------------------
+// Description of a register class.
+//----------------------------------------------------------------------------
+class RegisterClass : public MdlItem {
+public:
+  RegisterClass(const MdlItem &item, Identifier *id, RegisterDefList *members)
+      : MdlItem(item), id_(id), members_(members) {}
+  explicit RegisterClass(std::string name) : id_(new Identifier(name)) {}
+
+  // Return true if decl is a superset of this class.
+  bool IsSupersetOf(const RegisterClass *decl) const {
+    for (auto *reg : *decl->members_)
+      if (!FindItem(*members_, reg->name()))
+        return false;
+    return true;
+  }
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  RegisterDefList *members() const { return members_; }
+  bool IsNull() const { return name() == "__"; }
+
+private:
+  Identifier *id_;           // Name of the class.
+  RegisterDefList *members_; // List of registers included in class.
+};
+
+//----------------------------------------------------------------------------
+// An instance argument which refers to a register class.
+//----------------------------------------------------------------------------
+class RegisterClassRef : public MdlItem {
+public:
+  explicit RegisterClassRef(RegisterClass *item)
+      : MdlItem(*item), id_(item->id()), regs_(item) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  RegisterClass *regs() { return regs_; }
+
+private:
+  Identifier *id_ = nullptr;      // name of the referenced class
+  RegisterClass *regs_ = nullptr; // link to the referenced class
+};
+
+//----------------------------------------------------------------------------
+// Description of a pipeline phase group defined in the MDL.
+//      phases <name> { phase1, phase2, ... };
+// Phases defined as ranges (E[3..5]) are expanded in this object.
+// This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+class PipePhases : public MdlItem {
+public:
+  PipePhases(const MdlItem &item, Identifier *id, PhaseNameList *phases,
+             PhaseName *first_execute_phase, bool is_protected, bool is_hard)
+      : MdlItem(item), id_(id), phase_names_(phases),
+        first_execute_phase_name_(first_execute_phase),
+        is_protected_(is_protected), is_hard_(is_hard) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  PhaseNameList *phase_names() const { return phase_names_; }
+  bool is_protected() const { return is_protected_; }
+  bool is_hard() const { return is_hard_; }
+  PhaseName *first_execute_phase_name() const {
+    return first_execute_phase_name_;
+  }
+
+private:
+  Identifier *const id_ = nullptr;             // name of pipeline phase group
+  PhaseNameList *const phase_names_ = nullptr; // names of each phase
+  PhaseName *const first_execute_phase_name_;  // first execute phase
+  const bool is_protected_ = true; // true if the pipeline is protected
+  const bool is_hard_ = false;     // true if the pipeline is hard vs soft
+};
+
+//----------------------------------------------------------------------------
+// Types of references used in Reference objects, and three function
+// declarations for converting between these ids and strings. The functions
+// (declared below) must be kept in sync with these definitions.  The order
+// of the definitions is significant: it represents the order reference
+// lists are written out.
+//----------------------------------------------------------------------------
+struct RefTypes {
+  using Item = int16_t;
+  static constexpr int kNull = 0;
+  static constexpr int kPred = 1;     // use of a predicate operand
+  static constexpr int kUse = 2;      // use of an operand and/or resources
+  static constexpr int kDef = 4;      // def of an operand and use of resources
+  static constexpr int kKill = 8;     // kill of an operand
+  static constexpr int kUseDef = 16;  // operand use and def (use of a resource)
+  static constexpr int kHold = 32;    // hold on availability of resources
+  static constexpr int kReserve = 64; // reserve resources until a given cycle
+  static constexpr int kFus = 128;    // use a set of functional units
+  static constexpr int kCond = 256;   // conditional reference
+
+  static constexpr int kAnyDef = kDef | kUseDef | kKill;
+  static constexpr int kAnyUse = kPred | kUse | kUseDef;
+  static constexpr int kAnyUseDef = kAnyDef | kAnyUse;
+  static constexpr int kHoldReserve = kHold | kReserve;
+};
+using RefType = RefTypes::Item;
+
+// Map a string from the mdl input file to a RefType.
+extern RefType StringToRefType(const std::string &ref_type);
+// Format a RefType for debug output.
+extern std::string RefTypeToString(RefType ref_type);
+// Format a RefType for database generation.
+extern std::string FormatReferenceType(RefType ref_type);
+// Format an aggregated RefType for database generation.
+extern std::string FormatReferenceTypes(int ref_type);
+// Format a reference flags field.
+extern std::string FormatReferenceFlags(const Reference *ref);
+
+//----------------------------------------------------------------------------
+// A set of flags for describing scheduling attributes for operand, resource,
+// and explicit functional unit references.  These values are passed through
+// the generated database, so their values must correspond to same-named
+// values in MDLInfo.h.
+//----------------------------------------------------------------------------
+struct RefFlags {
+  using Item = int8_t;
+
+  // Reference flags for operand and resource references.
+  static constexpr int kNone = 0;
+  static constexpr int kProtected = 1;
+  static constexpr int kUnprotected = 2;
+  static constexpr int kDuplicate = 4;
+
+  // Reference flags for explicit functional unit references.
+  static constexpr int kUnreserved = 1;
+  static constexpr int kBuffered = 2;
+  static constexpr int kBeginGroup = 4;
+  static constexpr int kEndGroup = 8;
+  static constexpr int kSingleIssue = 16;
+  static constexpr int kRetireOOO = 32;
+
+  static bool is_protected(Item flag) { return flag & kProtected; }
+  static bool is_unprotected(Item flag) { return flag & kUnprotected; }
+  static bool is_duplicate(Item flag) { return flag & kDuplicate; }
+  static bool is_unreserved(Item flag) { return flag & kUnreserved; }
+  static bool is_buffered(Item flag) { return flag & kBuffered; }
+  static bool is_begin_group(Item flag) { return flag & kBeginGroup; }
+  static bool is_end_group(Item flag) { return flag & kEndGroup; }
+  static bool is_single_issue(Item flag) { return flag & kSingleIssue; }
+  static bool is_retire_ooo(Item flag) { return flag & kRetireOOO; }
+};
+
+//----------------------------------------------------------------------------
+// Resource pools can have subpools, we collect information about them.
+// SubPools have an ordering based on how restrictive they are (number of
+// resources they qualify for).  The most restrictive pools are allocated
+// first.
+//----------------------------------------------------------------------------
+class SubPool {
+public:
+  explicit SubPool(const ResourceRef *res);
+  int first() const { return first_; }
+  int last() const { return last_; }
+  int size() const { return last_ - first_; }
+  bool operator<(const SubPool &rhs) const {
+    return size() < rhs.size() ||
+           (size() == rhs.size() &&
+            (first() < rhs.first() || last() < rhs.last()));
+  }
+  bool operator>(const SubPool &item) const { return item < *this; }
+  std::string ToString() const;
+
+private:
+  int first_; // id of first member of the subpool
+  int last_;  // id of last member of the subpool
+};
+
+// Information about a single subpool.
+class SubPoolInfo {
+public:
+  void set_subpool_id(int id) { subpool_id_ = id; }
+  void add_count(int count) {
+    if (count)
+      counts_.emplace(count); // Don't add zeros.
+  }
+  int subpool_id() const { return subpool_id_; }
+  const std::set<int> &counts() const { return counts_; }
+
+  std::string ToString(std::string subpool) const;
+
+private:
+  int subpool_id_ = -1;
+  std::set<int> counts_; // the set of all non-zero count requests
+};
+
+// For each pooled reference, keep track of how many resources were requested.
+using SubPools = std::map<SubPool, SubPoolInfo>;
+
+enum class GroupType { kUseAll, kUseSingle };
+
+//----------------------------------------------------------------------------
+// Definition of a single resource object defined in the MDL.
+//      resource <name>;
+//      resource <name> { <member>, <member>, ... };
+//      resource <name>:<bits>;
+//      resource <name>[pool_size>];
+//      resource <name>:<bits>[pool_size>];
+// This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+class ResourceDef : public MdlItem {
+public:
+  ResourceDef(const MdlItem &item, Identifier *id, int bits, int pool_size,
+              Identifier *start, Identifier *end)
+      : MdlItem(item), id_(id), bit_size_(bits), pool_size_(pool_size),
+        start_phase_(start), end_phase_(end) {}
+  ResourceDef(const MdlItem &item, Identifier *id, int bits, IdList *members,
+              Identifier *start, Identifier *end)
+      : MdlItem(item), id_(id), bit_size_(bits), members_(*members),
+        start_phase_(start), end_phase_(end) {}
+  ResourceDef(const MdlItem &item, Identifier *id) : MdlItem(item), id_(id) {}
+  explicit ResourceDef(Identifier *const id) : MdlItem(*id), id_(id) {}
+  explicit ResourceDef(std::string name)
+      : MdlItem(), id_(new Identifier(name)) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  IdList &members() { return members_; }
+  int bit_size() const { return bit_size_; }
+  bool has_shared_bits() const { return bit_size_ > 0; }
+  int pool_size() const { return pool_size_; }
+  Identifier *start_phase() const { return start_phase_; }
+  Identifier *end_phase() const { return end_phase_; }
+  bool IsNull() const { return name() == "__"; }
+
+  bool IsPoolDef() const { return IsGroupDef() || pool_size_ > 0; }
+  bool IsGroupDef() const { return !members_.empty(); }
+
+  void set_resource_id(int id) { resource_id_ = id; }
+  int get_resource_id() const { return resource_id_; }
+  int pool_id() const { return pool_id_; }
+  void set_pool_id(int pool_id) { pool_id_ = pool_id; }
+  void add_alloc_size(int size) { alloc_sizes_.emplace(size); }
+  std::set<int> &alloc_sizes() { return alloc_sizes_; }
+  SubPoolInfo &sub_pool(SubPool &pool) { return sub_pools_[pool]; }
+  SubPools &sub_pools() { return sub_pools_; }
+  void AddReferenceSizeToPool(const ResourceRef *resource, const Reference *ref,
+                              const SubUnitInstantiation *subunit);
+
+  std::string resource_format();
+
+  int GetMemberId(const Identifier *member) const {
+    for (auto *mem : members_)
+      if (mem->name() == member->name())
+        return mem->index();
+    return -1;
+  }
+
+  RegisterClass *reg_class() const { return reg_class_; }
+  void set_reg_class(RegisterClass *regs) { reg_class_ = regs; }
+  ResourceRef *port_resource() const { return port_resource_; }
+  void set_port_resource(ResourceRef *res) { port_resource_ = res; }
+
+  void RecordReference(RefType type, const PhaseExpr *expr,
+                       const ResourceRef *resource, const Reference *ref,
+                       const SubUnitInstantiation *subunit);
+
+  void set_debug_name(std::string type, const CpuInstance *cpu,
+                      const ClusterInstance *cluster,
+                      const FuncUnitInstantiation *fu);
+  std::string debug_name() const { return debug_name_; }
+  std::string ref_summary() const;
+  int ref_types() const { return ref_types_; }
+  bool is_used() const { return ref_types_ != 0; }
+  bool only_held() const {
+    return (ref_types_ & (RefTypes::kHoldReserve)) == RefTypes::kHold;
+  }
+  bool only_reserved() const {
+    return (ref_types_ & (RefTypes::kHoldReserve)) == RefTypes::kReserve;
+  }
+
+  bool is_unreserved() const { return is_unreserved_; }
+  void set_unreserved(bool reserve) { is_unreserved_ = reserve; }
+  bool is_buffered() const { return is_buffered_; }
+  void set_buffered(bool buffered) { is_buffered_ = buffered; }
+
+  int latest_ref() const { return latest_ref_; }
+  bool phase_expr_seen() const { return phase_expr_seen_; }
+
+  std::vector<ResourceDef *> &member_defs() { return member_defs_; }
+  void add_member_def(ResourceDef *def) { member_defs_.push_back(def); }
+  ResourceDef *get_member_def(int index) const { return member_defs_[index]; }
+  bool implicit_group() const { return implicit_group_; }
+  void set_implicit_group() { implicit_group_ = true; }
+  GroupType group_type() const { return group_type_; }
+  void set_group_type(GroupType type) { group_type_ = type; }
+  void set_pool_size(unsigned size) { pool_size_ = size; }
+
+private:
+  Identifier *const id_ = nullptr;    // name of the referenced resource
+  const int bit_size_ = -1;           // number of bits represented
+  IdList members_;                    // members of a named resource pool
+  int pool_size_ = 0;                 // number of elements in pool (or 0)
+  Identifier *start_phase_ = nullptr; // optional start phase id
+  Identifier *end_phase_ = nullptr;   // optional end phase id
+  int resource_id_ = 0;               // resource index for this object
+  int pool_id_ = -1;                  // id, if resource is first in a pool
+
+  std::vector<ResourceDef *> member_defs_;
+  bool implicit_group_ = false; // True if this is an implicit group def
+  GroupType group_type_ = GroupType::kUseSingle;
+
+  ResourceRef *port_resource_ = nullptr; // resource port is connected to
+  RegisterClass *reg_class_ = nullptr;   // optional constraint for a port
+
+  int earliest_ref_ = -1;        // earliest seen reference
+  int latest_ref_ = -1;          // latest seen reference
+  bool phase_expr_seen_ = false; // true if there are phase expressions
+  int ref_types_ = 0;            // OR of all seen reference types
+  std::string debug_name_;       // pretty name for printing
+  std::set<int> alloc_sizes_;    // set of all pool size requests
+  SubPools sub_pools_;           // Map of all subpools for this pool
+  bool is_unreserved_ = false;   // is the resource unreserved?
+  bool is_buffered_ = false;     // is the resource buffered?
+};
+
+//----------------------------------------------------------------------------
+// Use of a resource (used in a functional- or sub-unit instantiation.
+//   ... <name> ...                  // Reference entire resource.
+//   ... <name>.<member> ...         // Reference a member.
+//   ... <name>:<count> ...          // Reference <count> pool members.
+//   ... <name>[<first>..<last>]...  // Reference part of a pool.
+// This object owns all the data pointed to by member pointers, except
+// for the definition_ member.
+//----------------------------------------------------------------------------
+class ResourceRef : public MdlItem {
+public:
+  ResourceRef(const MdlItem &item, Identifier *id) : MdlItem(item), id_(id) {}
+  ResourceRef(const MdlItem &item, Identifier *id, int pool_count,
+              Identifier *pool_count_name, Identifier *value_name)
+      : MdlItem(item), id_(id), pool_count_(pool_count),
+        pool_count_name_(pool_count_name), value_name_(value_name) {}
+  ResourceRef(const MdlItem &item, Identifier *id, Identifier *member)
+      : MdlItem(item), id_(id), member_(member) {}
+  ResourceRef(const MdlItem &item, Identifier *id, int first, int last)
+      : MdlItem(item), id_(id), first_(first), last_(last) {}
+  explicit ResourceRef(std::string name)
+      : MdlItem(), id_(new Identifier(name)) {}
+  explicit ResourceRef(ResourceDef *def)
+      : MdlItem(*def), id_(def->id()), first_(def->pool_size() > 0 ? 0 : -1),
+        last_(def->pool_size() > 0 ? def->pool_size() - 1 : -1),
+        definition_(def) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  Identifier *member() const { return member_; }
+  int pool_count() const { return pool_count_; }
+  Identifier *pool_count_name() const { return pool_count_name_; }
+
+  bool HasCount() const {
+    return pool_count_ != -1 || pool_count_name_ != nullptr;
+  }
+  Identifier *value_name() const { return value_name_; }
+
+  int first() const { return first_; }
+  int last() const { return last_; }
+  int pool_size() const {
+    if (IsGroupDef())
+      return definition_->members().size();
+    return last_ - first_ + 1;
+  }
+
+  bool IsNull() const { return name() == "__"; }
+  bool IsPool() const { return first_ != -1; }
+  bool IsGroupRef() const { return IsGroupDef() && !member(); }
+  bool IsGroupDef() const {
+    return definition_ != nullptr && definition_->IsGroupDef();
+  }
+  bool implicit_group() const {
+    return IsGroupDef() && definition_->implicit_group();
+  }
+
+  bool IsArrayDef() const {
+    return definition_ != nullptr && IsPool() && definition_->pool_size();
+  }
+  bool IsPooledResourceRef() const {
+    return (IsGroupRef() || (IsArrayDef() && !IsIndexed())) && !HasCount();
+  }
+  bool IsUnqualifiedRef() const { return !member() && first() == -1; }
+  bool HasAllocation() const { return HasCount(); }
+  bool HasValueName() const { return value_name_ != nullptr; }
+
+  int IsSubrange() const { return first_ != -1 && last_ != first_; }
+  int IsIndexed() const { return first_ != -1 && first_ == last_; }
+
+  int member_id() const { return member_id_; }
+
+  void set_first(int first) { first_ = first; }
+  void set_last(int last) { last_ = last; }
+  void set_pool_count(int count) { pool_count_ = count; }
+  void set_pool_count_name(Identifier *count) { pool_count_name_ = count; }
+  void set_use_all_members() { use_all_members_ = true; }
+  bool use_all_members() const { return use_all_members_; }
+  void set_value_name(Identifier *mask) { value_name_ = mask; }
+  void set_subrange(int first, int last) {
+    set_first(first);
+    set_last(last);
+  }
+  ResourceDef *definition() const { return definition_; }
+  void set_definition(ResourceDef *def) { definition_ = def; }
+  ResourceDef *get_port_definition() const { return definition_; }
+  int get_resource_id() const {
+    return definition_ ? definition_->get_resource_id() : -1;
+  }
+  int get_final_resource_id() const {
+    if (first() != -1 && first() == last())
+      return get_resource_id() + first();
+    return get_resource_id();
+  }
+
+  Params *get_parameter() { return arg_parameter_link_; }
+  void set_parameter(Params *parameter) { arg_parameter_link_ = parameter; }
+  int operand_index() const { return operand_index_; }
+  void set_operand_index(int id) { operand_index_ = id; }
+  bool has_operand_index() const { return operand_index_ != -1; }
+
+private:
+  Identifier *id_ = nullptr;     // name of the referenced resource
+  Identifier *member_ = nullptr; // which member is named (a.b)
+  int pool_count_ = -1;          // how many pool members (a:count)
+  bool use_all_members_ = false; // true if "resource:*" syntax specified
+  Identifier *pool_count_name_ = nullptr; // symbolic count of members
+  Identifier *value_name_ = nullptr;      // name of operand value attribute
+  int first_ = -1, last_ = -1;            // subrange of pool (a[2..4] or a[3])
+  int member_id_ = -1;                    // index of a member reference
+
+  // Links to related objects.
+  ResourceDef *definition_ = nullptr;    // link to resource definition
+  Params *arg_parameter_link_ = nullptr; // arguments are linked to parameters
+  int operand_index_ = -1; // pooled resources are tied to an operand
+};
+
+//----------------------------------------------------------------------------
+// Reflect what is in the MDL for a single processor definition.
+// This object owns all the data pointed to by const member pointers.
+//----------------------------------------------------------------------------
+class CpuInstance : public MdlItem {
+public:
+  CpuInstance(const MdlItem &item, Identifier *id, PipeDefList *pipe_phases,
+              ResourceDefList *issues, ResourceDefList *res,
+              int reorder_buffer_size, ClusterList *clusters,
+              ForwardStmtList *forward_stmts,
+              std::vector<std::string> &llvm_names)
+      : MdlItem(item), id_(id), pipe_phases_(pipe_phases), issues_(issues),
+        resources_(res), reorder_buffer_size_(reorder_buffer_size),
+        clusters_(clusters), forward_stmts_(forward_stmts),
+        llvm_names_(llvm_names) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  ResourceDefList *issues() const { return issues_; }
+  ResourceDefList *resources() const { return resources_; }
+  int reorder_buffer_size() const { return reorder_buffer_size_; }
+  ClusterList *clusters() const { return clusters_; }
+  ForwardStmtList *forward_stmts() const { return forward_stmts_; }
+  std::vector<std::string> &llvm_names() { return llvm_names_; }
+  bool needs_slot_resources() const { return needs_slot_resources_; }
+  void set_needs_slot_resources(bool value) { needs_slot_resources_ = value; }
+
+  void add_cpu_resource(ResourceDef *resource, std::string type,
+                        const CpuInstance *cpu, const ClusterInstance *cluster,
+                        const FuncUnitInstantiation *fu) {
+    all_resources_.push_back(resource);
+    resource->set_debug_name(type, cpu, cluster, fu);
+  }
+  void add_fu_pool_size(int size) { fu_pool_sizes_.insert(size); }
+  std::set<int> &fu_pool_sizes() { return fu_pool_sizes_; }
+
+  ResourceDefList &all_resources() { return all_resources_; }
+  void add_pool_resource(ResourceDef *pool) { pool_resources_.push_back(pool); }
+  ResourceDefList &pool_resources() { return pool_resources_; }
+  PipeDefList *pipe_phases() { return pipe_phases_; }
+
+  int max_resource_phase() const { return max_resource_phase_; }
+  void set_max_used_resource_id(int id) { max_used_resource_id_ = id; }
+  int max_used_resource_id() const { return max_used_resource_id_; }
+  void set_max_resource_phase(int phase) { max_resource_phase_ = phase; }
+  int max_issue() const { return max_issue_; }
+  void set_max_issue(int issue) { max_issue_ = issue; }
+  int pool_count() const { return pool_count_; }
+  void set_pool_count(int pool_count) { pool_count_ = pool_count; }
+  int max_pool_allocation() const { return max_pool_allocation_; }
+  void set_max_pool_allocation(int size) { max_pool_allocation_ = size; }
+  int early_use_phase() const { return early_use_phase_; }
+  void set_early_use_phase(int phase) { early_use_phase_ = phase; }
+  int load_phase() {
+    for (const auto *p1 : *pipe_phases())
+      if (const auto *item = FindItem(*p1->phase_names(), "LOAD_PHASE"))
+        return item->index();
+    return 0;
+  }
+  int high_latency_def_phase() {
+    for (const auto *p1 : *pipe_phases())
+      if (const auto *item = FindItem(*p1->phase_names(), "HIGH_PHASE"))
+        return item->index();
+    return 0;
+  }
+  void set_max_fu_id(int last_id) { max_fu_id_ = last_id; }
+  int max_fu_id() const { return max_fu_id_; }
+
+  FuncUnitInstances &func_unit_instances() { return func_unit_instances_; }
+
+private:
+  Identifier *const id_ = nullptr;             // name of this CPU
+  PipeDefList *pipe_phases_ = nullptr;         // locally defined pipe phases
+  ResourceDefList *const issues_ = nullptr;    // issue slot resources
+  ResourceDefList *const resources_ = nullptr; // resources defined locally
+  int reorder_buffer_size_ = -1;               // size of reorder buffer
+  ClusterList *const clusters_ = nullptr;      // clusters defined
+  ForwardStmtList *const forward_stmts_ = nullptr; // forward statements
+  std::vector<std::string> llvm_names_;            // optional llvm names
+  ResourceDefList all_resources_;         // all resources defined for CPU
+  ResourceDefList pool_resources_;        // all pooled resources for CPU
+  FuncUnitInstances func_unit_instances_; // map of templates to instances
+  std::set<int> fu_pool_sizes_;           // set of fu allocation pools
+
+  int max_used_resource_id_ = 0;      // number of "used" resources
+  int max_resource_phase_ = 0;        // latest resource "use"
+  int max_issue_ = 0;                 // maximum parallel issue size
+  int pool_count_ = 0;                // number of pooled resources
+  int max_pool_allocation_ = 0;       // max pool allocation size
+  int early_use_phase_ = -1;          // earliest named "use" phase
+  bool needs_slot_resources_ = false; // True if we must model slots
+  int max_fu_id_ = 0;                 // Id of last func unit.
+};
+
+//----------------------------------------------------------------------------
+// Instance of a cluster defined in a processor description.
+// This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+class ClusterInstance : public MdlItem {
+public:
+  ClusterInstance(const MdlItem &item, Identifier *id, ResourceDefList *issue,
+                  ResourceDefList *res, FuncUnitInstList *fus,
+                  ForwardStmtList *forward_stmts)
+      : MdlItem(item), id_(id), issues_(issue), resources_(res),
+        func_units_(fus), forward_stmts_(forward_stmts) {}
+  explicit ClusterInstance(FuncUnitInstance *func_unit)
+      : MdlItem(), id_(new Identifier("__")), issues_(new ResourceDefList),
+        resources_(new ResourceDefList),
+        func_units_(new FuncUnitInstList(1, func_unit)) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  bool IsNull() const { return name() == "__"; }
+  ResourceDefList *issues() const { return issues_; }
+  ResourceDefList *resources() const { return resources_; }
+  FuncUnitInstList *func_units() const { return func_units_; }
+  ForwardStmtList *forward_stmts() const { return forward_stmts_; }
+  void AddFuncUnitInstantiation(FuncUnitInstantiation *fu) {
+    fu_instantiations_.push_back(fu);
+  }
+  FuncUnitInstantiations &fu_instantiations() { return fu_instantiations_; }
+  // Debug: Dump out functional unit instantiations for this cluster.
+  void DumpFuncUnitInstantiations();
+
+private:
+  Identifier *const id_ = nullptr;                 // name of this CPU
+  ResourceDefList *const issues_ = nullptr;        // issue entries, if any
+  ResourceDefList *const resources_ = nullptr;     // resources defined locally
+  FuncUnitInstList *const func_units_ = nullptr;   // func units instantiated
+  ForwardStmtList *const forward_stmts_ = nullptr; // forward statements
+
+  FuncUnitInstantiations fu_instantiations_;
+};
+
+//----------------------------------------------------------------------------
+// Instance of a functional unit referenced in a CPU or cluster.
+// An instance of a functional unit can be "unreserved" - this is used to
+// model itineraries, which don't directly tie instructions to functional units.
+//
+// A functional unit instance can be "pinned" to issue/encoding slots, which
+// are specified in an "issue" statement:
+//  - f() -> slot : pin an instance to a specific slot.
+//  - f() -> slot1 | slot2 : pin an instance to one of several slots.
+//  - f() -> slot & slot : pin an instance to more than one slot.
+// If there is no pinning specification, the instance can be issued in
+// any slot.
+// This object owns all the data pointed to by member pointers, except for
+// the fu_template member.
+//----------------------------------------------------------------------------
+class FuncUnitInstance : public MdlItem {
+public:
+  FuncUnitInstance(const MdlItem &item, Identifier *type, Identifier *id,
+                   bool unreserved, int buffer_size, ResourceRefList *args,
+                   IdList *any, IdList *all)
+      : MdlItem(item), id_(id), type_(type), unreserved_(unreserved),
+        buffer_size_(buffer_size), args_(args), pin_all_(all), pin_any_(any) {}
+  explicit FuncUnitInstance(const std::string type)
+      : MdlItem(), id_(new Identifier(type)), type_(new Identifier(type)),
+        args_(new ResourceRefList), pin_all_(nullptr), pin_any_(nullptr) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  Identifier *type() const { return type_; }
+  bool is_unreserved() const { return unreserved_; }
+  int buffer_size() const { return buffer_size_; }
+  bool is_buffered() const { return buffer_size_ > 0; }
+  ResourceRefList *args() const { return args_; }
+  IdList *pin_any() const { return pin_any_; }
+  IdList *pin_all() const { return pin_all_; }
+
+  void set_template(FuncUnitTemplate *temp) { fu_template_ = temp; }
+  FuncUnitTemplate *get_template() const { return fu_template_; }
+
+  ResourceRefList *get_resource_slots_any() const { return fu_pin_any_; }
+  ResourceRefList *get_resource_slots_all() const { return fu_pin_all_; }
+  void set_resource_slots_any(ResourceRefList *res) { fu_pin_any_ = res; }
+  void set_resource_slots_all(ResourceRefList *res) { fu_pin_all_ = res; }
+
+  // "catchall" unit names use a colon followed by their associated CPU name.
+  bool is_catchall_unit() const { return is_catchall_name(name()); }
+
+private:
+  Identifier *const id_ = nullptr;          // name of this unit (optional)
+  Identifier *const type_ = nullptr;        // template of this unit
+  bool unreserved_ = false;                 // Is this an unreserved FU?
+  int buffer_size_ = 0;                     // reservation station size
+  ResourceRefList *const args_ = nullptr;   // arguments to this instance
+  IdList *const pin_all_ = nullptr;         // FU needs more than one slot
+  IdList *const pin_any_ = nullptr;         // set of slots FU can be pinned to
+  FuncUnitTemplate *fu_template_ = nullptr; // link to template
+  ResourceRefList *fu_pin_any_ = nullptr;   // slot resource list
+  ResourceRefList *fu_pin_all_ = nullptr;   // slot resource list
+};
+
+//----------------------------------------------------------------------------
+// Instance of a single functional unit forwarding statement.
+//----------------------------------------------------------------------------
+using ForwardToSet = std::vector<std::pair<Identifier *, int>>;
+
+class ForwardStmt : public MdlItem {
+public:
+  ForwardStmt(const MdlItem &item, Identifier *from_unit, ForwardToSet to_units)
+      : from_unit_(from_unit), to_units_(to_units) {}
+
+  Identifier *from_unit() const { return from_unit_; }
+  const ForwardToSet &to_units() const { return to_units_; }
+  std::string ToString() const;
+
+private:
+  Identifier *from_unit_;
+  ForwardToSet to_units_;
+};
+
+//----------------------------------------------------------------------------
+// Instance of a sub-unit referenced in a functional unit.
+// This object owns all the data pointed to by member pointers, except for
+// the su_template member.
+//----------------------------------------------------------------------------
+class SubUnitInstance : public MdlItem {
+public:
+  SubUnitInstance(const MdlItem &item, Identifier *id, ResourceRefList *args,
+                  IdList *predicates)
+      : MdlItem(item), id_(id), args_(args), predicates_(predicates) {}
+  SubUnitInstance(const MdlItem &item, Identifier *id)
+      : MdlItem(item), id_(id), args_(new ResourceRefList),
+        predicates_(nullptr) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  ResourceRefList *args() const { return args_; }
+  IdList *predicates() const { return predicates_; }
+  void set_template(SubUnitTemplate *temp) { su_template_ = temp; }
+  SubUnitTemplate *get_template() { return su_template_; }
+
+private:
+  Identifier *const id_ = nullptr;         // name of subunit template
+  ResourceRefList *const args_ = nullptr;  // arguments passed to the instance
+  IdList *const predicates_ = nullptr;     // predicates guarding instance
+  SubUnitTemplate *su_template_ = nullptr; // link to subunit template
+};
+
+//----------------------------------------------------------------------------
+// Instance of a latency referenced in a subunit.
+// This object owns all the data pointed to by member pointers, except for
+// the lat_template member.
+//----------------------------------------------------------------------------
+class LatencyInstance : public MdlItem {
+public:
+  LatencyInstance(const MdlItem &item, Identifier *id, ResourceRefList *args,
+                  IdList *predicates)
+      : MdlItem(item), id_(id), args_(args), predicates_(predicates) {}
+  explicit LatencyInstance(const std::string name)
+      : MdlItem(), id_(new Identifier(name)), args_(new ResourceRefList),
+        predicates_(nullptr) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  ResourceRefList *args() const { return args_; }
+  IdList *predicates() const { return predicates_; }
+
+  void set_template(LatencyTemplate *temp) { lat_template_ = temp; }
+  LatencyTemplate *get_template() { return lat_template_; }
+
+private:
+  Identifier *const id_ = nullptr;          // which latency to instantiate
+  ResourceRefList *const args_ = nullptr;   // instantiation arguments
+  IdList *const predicates_ = nullptr;      // predicates guarding instance
+  LatencyTemplate *lat_template_ = nullptr; // link to template
+};
+
+//----------------------------------------------------------------------------
+// Template parameters for functional units, subunits, and latencies.
+// This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+enum ParamType { kParamPort, kParamClass, kParamResource };
+
+class Params : public MdlItem {
+public:
+  Params(const MdlItem &item, Identifier *id, ParamType type)
+      : MdlItem(item), id_(id), type_(type) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  ParamType type() const { return type_; }
+
+  bool IsClass() const { return type_ == kParamClass; }
+  bool IsPort() const { return type_ == kParamPort; }
+  bool IsResource() const { return type_ == kParamResource; }
+
+private:
+  Identifier *const id_ = nullptr;    // name of this parameter
+  const ParamType type_ = kParamPort; // port, register class, or resource
+};
+
+//----------------------------------------------------------------------------
+// Template definition of a functional unit.
+// This object owns all the data pointed to by member pointers, except for
+// the template definition pointers in the unit_bases_ vector.
+//----------------------------------------------------------------------------
+class FuncUnitTemplate : public MdlItem {
+public:
+  FuncUnitTemplate(const MdlItem &item, Identifier *id, IdList *bases,
+                   ParamsList *params, IdList *ports, ResourceDefList *res,
+                   ConnectList *conn, SubUnitInstList *su)
+      : MdlItem(item), id_(id), bases_(bases), params_(params), ports_(ports),
+        resources_(res), connections_(conn), subunits_(su) {}
+  FuncUnitTemplate(MdlItem &item, Identifier *id, IdList *bases)
+      : MdlItem(item), id_(id), bases_(bases), params_(new ParamsList),
+        ports_(new IdList), resources_(new ResourceDefList),
+        connections_(new ConnectList), subunits_(new SubUnitInstList),
+        is_implicitly_defined_(true) {}
+  explicit FuncUnitTemplate(Identifier *id)
+      : MdlItem(), id_(id), bases_(new IdList), params_(new ParamsList),
+        ports_(new IdList), resources_(new ResourceDefList),
+        connections_(new ConnectList), subunits_(new SubUnitInstList),
+        is_implicitly_defined_(true) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  IdList *bases() const { return bases_; }
+  ParamsList *params() const { return params_; }
+  IdList *ports() const { return ports_; }
+  ResourceDefList *resources() const { return resources_; }
+  ConnectList *connections() const { return connections_; }
+  SubUnitInstList *subunits() const { return subunits_; }
+  void clone_params(ParamsList *params) {
+    params_ = new ParamsList;
+    for (auto *param : *params)
+      params_->push_back(new Params(*param));
+  }
+  void add_subunit_instance(SubUnitInstance *su) { subunits_->push_back(su); }
+
+  void add_base(FuncUnitTemplate *base) { unit_bases_.push_back(base); }
+  FuncUnitList &unit_bases() { return unit_bases_; }
+  bool is_implicitly_defined() const { return is_implicitly_defined_; }
+
+  std::set<std::string> &client_cpus() { return client_cpus_; }
+  void add_client_cpu(const std::string &cpu) { client_cpus_.insert(cpu); }
+
+private:
+  Identifier *const id_ = nullptr;             // name of this template
+  IdList *const bases_ = nullptr;              // base template ids, if any
+  ParamsList *params_ = nullptr;               // parameters defined for unit
+  IdList *const ports_ = nullptr;              // ports defined in this unit
+  ResourceDefList *const resources_ = nullptr; // resources defined locally
+  ConnectList *const connections_ = nullptr;   // connect statements in unit
+  SubUnitInstList *const subunits_ = nullptr;  // subunits instantiated
+  FuncUnitList unit_bases_;                    // functional unit bases
+  bool is_implicitly_defined_ = false;
+  std::set<std::string> client_cpus_; // cpus that use this FU.
+};
+
+//----------------------------------------------------------------------------
+// Definition of a functional unit template group.
+// Each item assigns a name to a group of functional units.
+//----------------------------------------------------------------------------
+class FuncUnitGroup : public MdlItem {
+public:
+  FuncUnitGroup(const MdlItem &item, Identifier *id, int buffer_size,
+                IdList *members)
+      : MdlItem(item), id_(id), buffer_size_(buffer_size), members_(members) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  int buffer_size() const { return buffer_size_; }
+  IdList *members() const { return members_; }
+  FuncUnitList &fu_members() { return fu_members_; }
+  void add_unit(FuncUnitTemplate *unit) { fu_members_.push_back(unit); }
+
+private:
+  Identifier *const id_ = nullptr;  // name of the group
+  int buffer_size_ = -1;            // size of input buffer
+  IdList *const members_ = nullptr; // members of the group
+  FuncUnitList fu_members_;         // links to templates
+};
+
+//----------------------------------------------------------------------------
+// Describes each connect statement in a functional unit template.
+// This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+class Connect : public MdlItem {
+public:
+  Connect(const MdlItem &item, Identifier *id, Identifier *rclass,
+          ResourceRef *resource)
+      : MdlItem(item), id_(id), reg_class_(rclass), resource_(resource) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  Identifier *reg_class() const { return reg_class_; }
+  ResourceRef *resource() const { return resource_; }
+
+private:
+  Identifier *const id_ = nullptr;        // name of referenced port
+  Identifier *const reg_class_ = nullptr; // register class connected to
+  ResourceRef *const resource_ = nullptr; // resource being referenced
+};
+
+//----------------------------------------------------------------------------
+// Description of a single functional unit reservation in subunit templates.
+//----------------------------------------------------------------------------
+class FuncUnitUse : public MdlItem {
+public:
+  FuncUnitUse(const MdlItem &item, Identifier *predicate, Identifier *func_unit,
+              int cycles)
+      : MdlItem(item), predicate_(predicate), func_unit_(func_unit),
+        cycles_(cycles) {}
+
+  Identifier *predicate() const { return predicate_; }
+  Identifier *func_unit() const { return func_unit_; }
+  std::string name() const { return func_unit_->name(); }
+  int cycles() const { return cycles_; }
+  std::string ToString() const {
+    std::string out;
+    if (predicate_)
+      out += predicate_->name() + ":";
+    out += func_unit_->name();
+    if (cycles_ != -1)
+      out += llvm::formatv("<{0}>", cycles_);
+    return out;
+  }
+
+private:
+  Identifier *predicate_; // optional predicate
+  Identifier *func_unit_; // used functional unit name
+  int cycles_;            // number of cycles reserved
+};
+
+//----------------------------------------------------------------------------
+// Template definition of a subunit.
+// This object owns all the data pointed to by member pointers, except for
+// the su_base member.
+//----------------------------------------------------------------------------
+class SubUnitTemplate : public MdlItem {
+public:
+  SubUnitTemplate(const MdlItem &item, Identifier *type, IdList *bases,
+                  StringList *regex_bases, ParamsList *params,
+                  LatencyInstList *latencies, LatencyTemplate *inline_latency)
+      : MdlItem(item), type_(type), bases_(bases), regex_bases_(regex_bases),
+        params_(params), latencies_(latencies),
+        inline_latency_(inline_latency) {}
+  SubUnitTemplate(const std::string type, LatencyInstance *latency,
+                  LatencyTemplate *inline_latency)
+      : MdlItem(), type_(new Identifier(type)), bases_(nullptr),
+        params_(new ParamsList), latencies_(new LatencyInstList(1, latency)),
+        inline_latency_(inline_latency) {}
+
+  std::string ToString() const;
+
+  Identifier *type() const { return type_; }
+  std::string const &name() const { return type_->name(); }
+  IdList *bases() const { return bases_; }
+  StringList *regex_bases() const { return regex_bases_; }
+  ParamsList *params() const { return params_; }
+  LatencyInstList *latencies() const { return latencies_; }
+  LatencyTemplate *inline_latency() { return inline_latency_; }
+
+  void add_base(SubUnitTemplate *unit) { unit_bases_.push_back(unit); }
+  SubUnitList &unit_bases() { return unit_bases_; }
+  void add_derived_subunit(SubUnitTemplate *derived) {
+    if (FindItem(derived_subunits_, derived->name()))
+      return;
+    derived_subunits_.push_back(derived);
+  }
+  SubUnitList &derived_subunits() { return derived_subunits_; }
+
+  int use_count() const { return use_count_; }
+  void inc_use() { use_count_++; }
+
+private:
+  Identifier *const type_ = nullptr;                // type of this subunit
+  IdList *const bases_ = nullptr;                   // base subunits (or empty)
+  StringList *const regex_bases_ = nullptr;         // matching instructions
+  ParamsList *const params_ = nullptr;              // unit parameters
+  LatencyInstList *const latencies_ = nullptr;      // which latencies to use
+  LatencyTemplate *const inline_latency_ = nullptr; // inline latency template
+  SubUnitList unit_bases_;                          // link to base templates
+  SubUnitList derived_subunits_;                    // derived subunits
+  int use_count_ = 0;                               // was it ever referenced?
+};
+
+//----------------------------------------------------------------------------
+// Template definition of a latency.
+// This object owns all the data pointed to by member pointers, except for
+// the lat_base member.
+//----------------------------------------------------------------------------
+class LatencyTemplate : public MdlItem {
+public:
+  LatencyTemplate(const MdlItem &item, Identifier *id, IdList *bases,
+                  ParamsList *params, ReferenceList *refs)
+      : MdlItem(item), id_(id), base_ids_(bases), params_(params),
+        references_(refs) {}
+  LatencyTemplate(std::string name, ReferenceList *refs)
+      : MdlItem(), id_(new Identifier(name)), base_ids_(nullptr),
+        params_(new ParamsList), references_(refs) {}
+
+  std::string ToString() const;
+
+  Identifier *id() const { return id_; }
+  std::string const &name() const { return id_->name(); }
+  IdList *base_ids() const { return base_ids_; }
+  ParamsList *params() const { return params_; }
+  ReferenceList *references() const { return references_; }
+
+  std::set<std::string> *referenced_fus() { return referenced_fus_; }
+  void set_referenced_fus(std::set<std::string> *fus) { referenced_fus_ = fus; }
+
+  void add_base(LatencyTemplate *temp) { unit_bases_.push_back(temp); }
+  LatencyList &unit_bases() { return unit_bases_; }
+
+private:
+  Identifier *const id_ = nullptr;            // which latency to instantiate
+  IdList *const base_ids_ = nullptr;          // base latencies (or empty)
+  ParamsList *const params_ = nullptr;        // parameters for this unit
+  ReferenceList *const references_ = nullptr; // all refs in template
+  LatencyList unit_bases_;                    // links to base templates
+  std::set<std::string> *referenced_fus_ = nullptr; // set of referenced FUs
+};
+
+//----------------------------------------------------------------------------
+// Description of an instruction operand reference, used in latency
+// rules to explicitly reference an operand, and for immediate operands
+// in phase expressions. This object owns all the data pointed to by member
+// pointers.
+//----------------------------------------------------------------------------
+class OperandRef : public MdlItem {
+public:
+  OperandRef(const MdlItem &item, Identifier *type, IdList *names)
+      : MdlItem(item), op_type_(type), op_names_(names) {}
+  OperandRef(Identifier *type, IdList *names, int operand_index)
+      : MdlItem(), op_type_(type), op_names_(names),
+        operand_index_(operand_index) {}
+  OperandRef(Identifier *type, IdList *names, RegisterClass *reg_class)
+      : MdlItem(), op_type_(type), op_names_(names), reg_class_(reg_class) {}
+  OperandRef(Identifier *type, IdList *names, RegisterClass *reg_class,
+             int operand_index)
+      : MdlItem(), op_type_(type), op_names_(names), reg_class_(reg_class),
+        operand_index_(operand_index) {}
+  explicit OperandRef(std::string default_name)
+      : MdlItem(), op_type_(new Identifier(default_name)),
+        op_names_(new IdList({new Identifier(default_name)})) {}
+
+  std::string ToString() const;
+
+  Identifier *op_type() const { return op_type_; }
+  IdList *op_names() const { return op_names_; }
+  std::string const &name() const { return (*op_names_)[0]->name(); }
+  std::string type_name() const;
+
+  int operand_index() const { return operand_index_; }
+  void set_operand_index(int index) { operand_index_ = index; }
+
+  OperandDef *operand() const { return operand_; }
+  void set_operand(OperandDef *operand) { operand_ = operand; }
+  RegisterClass *reg_class() const { return reg_class_; }
+  void set_regclass(RegisterClass *reg_class) { reg_class_ = reg_class; }
+  OperandDecl *operand_decl() const { return operand_decl_; }
+  void set_operand_decl(OperandDecl *decl) { operand_decl_ = decl; }
+
+private:
+  // Basic information that reflects directly what was in the input spec.
+  Identifier *const op_type_ = nullptr; // name of operand type (or null)
+  IdList *const op_names_ = nullptr;    // names of operand and suboperands
+
+  // The type of a reference can be either an operand type or a register class.
+  // These link the reference to one of those object types.
+  OperandDef *operand_ = nullptr;      // pointer to associated operand type
+  RegisterClass *reg_class_ = nullptr; // pointer to associated register class
+
+  // Links to more detailed information about how the reference is used.
+  // This information is generated when we generate instruction information.
+  int operand_index_ = -1;              // index of operand in instruction
+  OperandDecl *operand_decl_ = nullptr; // pointer to operand declaration
+};
+
+//----------------------------------------------------------------------------
+// Description of an expression used to specify a pipeline phase in a
+// latency rule. This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+enum PhaseOp {
+  kPlus,
+  kMinus,
+  kMult,
+  kDiv,
+  kNeg,
+  kPositive,
+  kPhase,
+  kInt,
+  kOpnd
+};
+
+class PhaseExpr : public MdlItem {
+public:
+  PhaseExpr(const MdlItem &item, PhaseOp op, PhaseExpr *left, PhaseExpr *right)
+      : MdlItem(item), operation_(op), left_(left), right_(right) {}
+  PhaseExpr(const MdlItem &item, PhaseOp op, int number)
+      : MdlItem(item), operation_(op), number_(number) {}
+  PhaseExpr(const MdlItem &item, PhaseOp op, Identifier *phase)
+      : MdlItem(item), operation_(op), phase_(phase) {}
+  PhaseExpr(const MdlItem &item, PhaseOp op, OperandRef *operand)
+      : MdlItem(item), operation_(op), operand_(operand) {}
+  PhaseExpr(Identifier *name, PhaseName *phase)
+      : MdlItem(), operation_(kPhase), phase_name_(phase), phase_(name) {}
+  explicit PhaseExpr(PhaseName *phase)
+      : MdlItem(), operation_(kPhase), phase_name_(phase),
+        phase_(new Identifier(phase->name())) {}
+  explicit PhaseExpr(int phase) : MdlItem(), operation_(kInt), number_(phase) {}
+
+  PhaseExpr *clone() {
+    if (operation() == kPhase)
+      return new PhaseExpr(phase_, phase_name_);
+    if (operation() == kInt)
+      return this;
+    if (operation() == kOpnd)
+      return new PhaseExpr(*this, kOpnd, new OperandRef(*operand_));
+
+    PhaseExpr *nleft = left() ? left()->clone() : nullptr;
+    PhaseExpr *nright = right() ? right()->clone() : nullptr;
+    return new PhaseExpr(*this, operation(), nleft, nright);
+  }
+
+  static PhaseExpr *DefaultLatency() { return new PhaseExpr(-1); }
+  bool IsDefaultLatency() { return operation_ == kInt && number_ == -1; }
+
+  // Methods for evaluating and checking validity/const-ness of expressions.
+  bool IsExpressionConstant() const;
+  bool IsExpressionLegal() const;
+  bool IsSubexpressionLegal() const;
+  int EvaluateConstantExpression() const;
+  int ConstantPhase() {
+    if (IsExpressionLegal() && IsExpressionConstant())
+      return EvaluateConstantExpression();
+    else
+      return -1;
+  }
+
+  // Are two phase expressions identical?
+  // We don't handle the general case of operand references here, those are
+  // considered unequal if they have different operand ids.
+  bool operator==(const PhaseExpr &item) {
+    if (operation_ != item.operation_)
+      return false;
+    if (left_ && item.left_ && *left_ != *item.left_)
+      return false;
+    if (right_ && item.right_ && *right_ != *item.right_)
+      return false;
+    if (operation_ == PhaseOp::kPhase)
+      return phase_name_ == item.phase_name_;
+    if (operation_ == PhaseOp::kInt)
+      return number_ == item.number_;
+    if (operation_ == PhaseOp::kOpnd)
+      return operand_ && item.operand_ &&
+             operand_->operand_index() == item.operand_->operand_index();
+    return true;
+  }
+  bool operator!=(const PhaseExpr &item) { return !(*this == item); }
+
+  // Add a small constant to a phase expression.
+  PhaseExpr *increment(int increment) {
+    if (increment == 0)
+      return this->clone();
+    return new PhaseExpr(*this, PhaseOp::kPlus, this->clone(),
+                         new PhaseExpr(*this, kInt, increment));
+  }
+  std::string ToString() const;
+  std::string FormatProtection() const {
+    auto *phase_name = GetPhaseName();
+    if (phase_name == nullptr)
+      return "";
+    return phase_name->FormatProtection();
+  }
+  bool IsProtected() const { return GetPhaseName()->is_protected(); }
+  bool IsUnprotected() const { return !GetPhaseName()->is_protected(); }
+
+  PhaseOp operation() const { return operation_; }
+  PhaseExpr *left() const { return left_; }
+  PhaseExpr *right() const { return right_; }
+  int number() const { return number_; }
+  OperandRef *operand() const { return operand_; }
+  Identifier *phase() const { return phase_; }
+  int phase_id() const { return phase_name_->index(); }
+  PhaseName *GetPhaseName() const;
+  bool HasPhaseName() const;
+  void set_phase_name(PhaseName *name) { phase_name_ = name; }
+
+private:
+  const PhaseOp operation_;          // operation of the expression
+  PhaseExpr *const left_ = nullptr;  // child operations
+  PhaseExpr *const right_ = nullptr; // child operations
+  PhaseName *phase_name_ = nullptr;  // Pointer to phase name item.
+  union {
+    const int number_;          // integer constant
+    OperandRef *const operand_; // reference to an instruction operand
+    Identifier *const phase_;   // reference to a phase name
+  };
+};
+
+//----------------------------------------------------------------------------
+// Enumerate the kinds of predicate expressions we support.
+//----------------------------------------------------------------------------
+enum class PredOp {
+  kCheckAny,               // Compound OR predicate
+  kCheckAll,               // Compound AND predicate
+  kCheckNot,               // Logical NOT predicate
+  kCheckOpcode,            // Check instruction against a list of opcodes
+  kCheckIsRegOperand,      // Check that an operand is a register
+  kCheckRegOperand,        // Check that an operand is a particular register
+  kCheckInvalidRegOperand, // Check that an operand is an invalid register
+  kCheckSameRegOperand,    // Check if two operands are the same register
+  kCheckIsImmOperand,      // Check that an operand is an immediate
+  kCheckImmOperand,        // Check for a particular immediate operand
+  kCheckZeroOperand,       // Check that an operand is zero
+  kCheckFunctionPredicate, // Function to call to implement predicate
+  kCheckFunctionPredicateWithTII, // Function to call to implement predicate
+  kCheckNumOperands, // Check that an instr has some number of opnds
+  kOpcodeSwitchStmt, // Switch statement
+  kOpcodeSwitchCase, // Single case statement
+  kReturnStatement,  // Switch return statement
+  kName,             // Register name, Predicate name, Opcode Name, etc
+  kNumber,           // An integer operand
+  kOperandRef,       // A named operand reference.
+  kString,           // A string operand
+  kCode,             // String representing C code
+  kTrue,             // Predicate always returns TRUE
+  kFalse,            // Predicate always returns FALSE
+  kEmpty,            // Undefined predicate
+};
+
+//----------------------------------------------------------------------------
+// Names that correspond to LLVM predicate operators.
+//----------------------------------------------------------------------------
+constexpr auto kCheckAny = "CheckAny";
+constexpr auto kCheckAll = "CheckAll";
+constexpr auto kCheckNot = "CheckNot";
+constexpr auto kCheckOpcode = "CheckOpcode";
+constexpr auto kCheckIsRegOperand = "CheckIsRegOperand";
+constexpr auto kCheckRegOperand = "CheckRegOperand";
+constexpr auto kCheckInvalidRegOperand = "CheckInvalidRegOperand";
+constexpr auto kCheckSameRegOperand = "CheckSameRegOperand";
+constexpr auto kCheckIsImmOperand = "CheckIsImmOperand";
+constexpr auto kCheckImmOperand = "CheckImmOperand";
+constexpr auto kCheckZeroOperand = "CheckZeroOperand";
+constexpr auto kCheckFunctionPredicate = "CheckFunctionPredicate";
+constexpr auto kCheckFunctionPredicateWithTII = "CheckFunctionPredicateWithTII";
+constexpr auto kCheckNumOperands = "CheckNumOperands";
+constexpr auto kOpcodeSwitchStmt = "OpcodeSwitchStatement";
+constexpr auto kOpcodeSwitchCase = "OpcodeSwitchCase";
+constexpr auto kReturnStatement = "ReturnStatement";
+constexpr auto kName = "Name";
+constexpr auto kNumber = "Number";
+constexpr auto kOperand = "Operand";
+constexpr auto kString = "String";
+constexpr auto kCode = "Code";
+constexpr auto kTrue = "TruePred";
+constexpr auto kFalse = "FalsePred";
+constexpr auto kEmpty = "Empty";
+
+//----------------------------------------------------------------------------
+// Definition of a predicate expression.
+//----------------------------------------------------------------------------
+class PredExpr : public MdlItem {
+public:
+  explicit PredExpr(PredOp opcode) : MdlItem(), opcode_(opcode) {}
+  PredExpr(const MdlItem &item, PredOp opcode)
+      : MdlItem(item), opcode_(opcode) {}
+  PredExpr(const MdlItem &item, PredOp opcode, std::string value)
+      : MdlItem(item), opcode_(opcode), value_(value) {}
+  PredExpr(const MdlItem &item, PredOp opcode, OperandRef *opnd)
+      : MdlItem(item), opcode_(opcode), opnd_(opnd) {}
+  PredExpr(const MdlItem &item, PredOp opcode, PredExpr *opnd)
+      : MdlItem(item), opcode_(opcode), operands_({opnd}) {}
+  PredExpr(const MdlItem &item, PredOp opcode, std::vector<PredExpr *> &opnds)
+      : MdlItem(item), opcode_(opcode), operands_(opnds) {}
+  PredExpr(PredOp opcode, bool negate) : MdlItem(), opcode_(opcode) {
+    if (negate)
+      opcode = IsTrue() ? PredOp::kFalse : PredOp::kTrue;
+  }
+
+  PredOp opcode() const { return opcode_; }
+  void set_opcode(PredOp opcode) { opcode_ = opcode; }
+  std::string value() const { return value_; }
+  OperandRef *opnd() const { return opnd_; }
+  std::vector<PredExpr *> &operands() { return operands_; }
+  std::string PredName();
+  std::string ToString(int indent);
+  bool negate() const { return negate_; }
+  void set_negate() { negate_ = !negate_; }
+  void reset_negate() { negate_ = false; }
+
+  bool IsTrue() const { return opcode_ == PredOp::kTrue; }
+  bool IsFalse() const { return opcode_ == PredOp::kFalse; }
+  bool IsEmpty() const { return opcode_ == PredOp::kEmpty; }
+
+  // Functions to generate code for reference predicates.
+  std::string GetOperand(PredExpr *index) const;
+  std::string OperandType() const;
+  std::string InvalidRegOperand() const;
+  std::string RegOperand(const std::string &family) const;
+  std::string SameRegOperand() const;
+  std::string ImmOperand() const;
+  std::string ImmZeroOperand() const;
+  std::string FunctionPredicate(bool withTII, OutputState *spec) const;
+  std::string NumOperands() const;
+  std::string CheckCompound(OutputState *spec);
+  std::string CheckCode(OutputState *spec) const;
+
+private:
+  PredOp opcode_;
+  bool negate_ = false; // perform a logical NOT of the operation
+  union {
+    std::string value_;                // Value of this op
+    OperandRef *opnd_;                 // Reference to a named operand
+    std::vector<PredExpr *> operands_; // Operands of this op (0..n)
+  };
+};
+
+//----------------------------------------------------------------------------
+// Description of a single latency rule as described in MDL.
+// This object owns all the data pointed to by member pointers.
+//----------------------------------------------------------------------------
+class Reference : public MdlItem {
+public:
+  // This constructor is used by visitors to create a new basic reference.
+  Reference(const MdlItem &item, IdList *predicates, RefType ref_type,
+            PhaseExpr *phase_expr, int repeat, int delay, int use_cycles,
+            OperandRef *operand, ResourceRefList *resources)
+      : MdlItem(item), predicates_(predicates), conditional_ref_(nullptr),
+        ref_type_(ref_type), phase_expr_(phase_expr), use_cycles_(use_cycles),
+        repeat_(repeat), delay_(delay), operand_(operand),
+        resources_(resources) {}
+
+  // This constructor creates a conditional reference.
+  Reference(const MdlItem &item, IdList *predicates, ConditionalRef *ref)
+      : MdlItem(item), predicates_(predicates), conditional_ref_(ref),
+        resources_(new ResourceRefList) {}
+
+  // This constructor is used to generate RefFus entries, which includes
+  // an explicit "micro_op" value.
+  Reference(MdlItem &item, RefType ref_type, PhaseExpr *phase_expr, int cycles,
+            int micro_ops, RefFlags::Item fu_flags, ResourceRef *unit)
+      : MdlItem(item), ref_type_(ref_type), phase_expr_(phase_expr),
+        use_cycles_(cycles), repeat_(0), delay_(0), micro_ops_(micro_ops),
+        fu_flags_(fu_flags), resources_(new ResourceRefList(1, unit)) {}
+  // This constructor creates a RefFus entry that -only- has a micro-op.
+  Reference(MdlItem &item, int micro_ops, RefFlags::Item fu_flags)
+      : MdlItem(item), ref_type_(RefTypes::kFus),
+        phase_expr_(new PhaseExpr(new PhaseName("E1"))), phase_value_(1),
+        use_cycles_(0), repeat_(0), delay_(0), micro_ops_(micro_ops),
+        fu_flags_(fu_flags), resources_(new ResourceRefList) {}
+
+  // This constructor creates default references to a "pseudo" functional unit
+  // for instructions which have no functional unit specifications.
+  Reference(RefType ref_type, PhaseName *phase, std::string func_unit_name)
+      : MdlItem(), ref_type_(ref_type), phase_expr_(new PhaseExpr(phase)),
+        resources_(new ResourceRefList(1, new ResourceRef(func_unit_name))) {}
+
+  // This constructor is used while instantiating subunits to create a
+  // copy of a latency reference. We don't copy normal resources at this
+  // point, since they need to be bound to template parameters, and this is
+  // done in the caller.  We do copy functional unit resource references tho.
+  Reference(Reference *item, PhaseExpr *phase)
+      : MdlItem(*item), predicates_(item->predicates()),
+        conditional_ref_(item->conditional_ref()), ref_type_(item->ref_type()),
+        phase_expr_(phase ? phase : item->phase_expr()),
+        use_cycles_(item->use_cycles()), micro_ops_(item->micro_ops()),
+        operand_(item->operand()), resources_(new ResourceRefList),
+        port_(nullptr), base_(item) {
+    if (item->IsFuncUnitRef() && !item->resources_->empty())
+      resources_->push_back(new ResourceRef(*(*item->resources_)[0]));
+  }
+
+  // This constructor is used while instantiating subunits to create a copy
+  // of a conditional latency reference.
+  Reference(Reference *item, ConditionalRef *cond)
+      : MdlItem(*item), predicates_(item->predicates()), conditional_ref_(cond),
+        ref_type_(item->ref_type()), phase_expr_(item->phase_expr()),
+        resources_(new ResourceRefList) {}
+
+  // This constructor is used when creating the instruction database, and
+  // we want to specialize operand references to the instruction they are
+  // associated with.
+  Reference(Reference &item, int delay)
+      : MdlItem(item), predicates_(item.predicates()),
+        conditional_ref_(item.conditional_ref()), ref_type_(item.ref_type()),
+        phase_expr_(item.phase_expr() ? item.phase_expr()->increment(delay)
+                                      : nullptr),
+        use_cycles_(item.use_cycles()), repeat_(item.repeat()),
+        delay_(item.delay()), micro_ops_(item.micro_ops()),
+        fu_flags_(item.fu_flags()),
+        operand_(item.operand() ? new OperandRef(*item.operand()) : nullptr),
+        resources_(new ResourceRefList), port_(item.port()), base_(&item) {
+    for (auto *ref : *item.resources())
+      resources_->push_back(new ResourceRef(*ref));
+  }
+
+  Reference(RefType ref_type, PhaseExpr *phase, OperandRef *operand)
+      : MdlItem(), ref_type_(ref_type), phase_expr_(phase->clone()),
+        operand_(operand), resources_(new ResourceRefList) {}
+  Reference(RefType ref_type, PhaseName *phase, OperandRef *operand)
+      : MdlItem(), ref_type_(ref_type), phase_expr_(new PhaseExpr(phase)),
+        operand_(operand), resources_(new ResourceRefList) {}
+
+  std::string ToString() const;
+  IdList *predicates() const { return predicates_; }
+  ConditionalRef *conditional_ref() const { return conditional_ref_; }
+  bool IsConditionalRef() const { return conditional_ref_ != nullptr; }
+
+  PhaseExpr *phase_expr() const { return phase_expr_; }
+  void set_phase_expr(PhaseName *name) { phase_expr_ = new PhaseExpr(name); }
+
+  int use_cycles() const { return use_cycles_; }
+  int repeat() const { return repeat_; }
+  int delay() const { return delay_; }
+  int micro_ops() const { return micro_ops_; }
+  int fu_flags() const { return fu_flags_; }
+  OperandRef *operand() const { return operand_; }
+  ResourceRefList *resources() const { return resources_; }
+
+  RefType ref_type() const { return ref_type_; }
+  void set_ref_type(RefType type) { ref_type_ = type; }
+
+  bool IsOperandRefType() const {
+    return ref_type_ > RefTypes::kNull && ref_type_ < RefTypes::kHold;
+  }
+  bool IsResourceRef() const {
+    return ref_type_ == RefTypes::kHold || ref_type_ == RefTypes::kReserve;
+  }
+  bool IsDef() const { return ref_type_ & RefTypes::kDef; }
+  bool IsUse() const { return ref_type_ & RefTypes::kUse; }
+  bool IsFuncUnitRef() const { return ref_type_ == RefTypes::kFus; }
+
+  RefType AdjustResourceReferenceType() const {
+    if (ref_type_ & (RefTypes::kHold | RefTypes::kReserve | RefTypes::kFus))
+      return ref_type_;
+    return !operand() ? ref_type_ : RefTypes::kUse;
+  }
+  bool IsDefaultOperandRef() {
+    return operand_ && operand_->operand_index() == -1;
+  }
+
+  void add_resource(ResourceRef *res) { resources_->push_back(res); }
+  void add_port(ResourceDef *port) { port_ = port; }
+  ResourceDef *port() const { return port_; }
+  void set_used() {
+    this->used_ = true;
+    for (auto *item = this; item->base_; item = item->base_)
+      item->base_->used_ = true;
+  }
+  bool used() const { return used_; }
+  void set_seen() {
+    this->seen_ = true;
+    for (auto *item = this; item->base_; item = item->base_)
+      item->base_->seen_ = true;
+  }
+  bool seen() const { return seen_; }
+
+  // References are ordered by pipeline phase, then by reference type.
+  // If the pipeline phase is non-trivial, its value is -1, and ordered last.
+  bool operator<(const Reference &item) const {
+    if (phase_value_ != item.phase_value_) {
+      if (phase_value_ == -1)
+        return false;
+      if (item.phase_value_ == -1)
+        return true;
+      return phase_value_ < item.phase_value_;
+    }
+    if (phase_expr_ != nullptr && item.phase_expr_ != nullptr)
+      return phase_expr_->ToString() < item.phase_expr_->ToString();
+
+    if (ref_type() != item.ref_type())
+      return ref_type_ < item.ref_type_;
+
+    if (operand() != nullptr && item.operand() != nullptr &&
+        operand()->operand_index() != item.operand()->operand_index())
+      return operand()->operand_index() < item.operand()->operand_index();
+
+    return ToString() < item.ToString();
+  }
+  bool operator>(const Reference &item) const { return item < *this; }
+
+  void SetConstantPhase() { phase_value_ = phase_expr_->ConstantPhase(); }
+  bool IsProtected() const { return phase_expr_->IsProtected(); }
+  bool IsUnprotected() const { return phase_expr_->IsUnprotected(); }
+  bool IsDuplicate() const { return is_duplicate_; }
+  void SetDuplicate() { is_duplicate_ = true; }
+
+private:
+  IdList *const predicates_ = nullptr;         // list of predicates for rule
+  ConditionalRef *conditional_ref_ = nullptr;  // if/then/else reference
+  RefType ref_type_ = RefTypes::kNull;         // type of reference
+  PhaseExpr *phase_expr_ = nullptr;            // pipeline phase of reference
+  int32_t phase_value_ = -1;                   // phase if expression is const
+  int use_cycles_ = 1;                         // # cycles resource is used
+  int repeat_ = 1;                             // default repeat count
+  int delay_ = 1;                              // default repeat delay cycles
+  int micro_ops_ = 0;                          // Fus entry micro ops
+  RefFlags::Item fu_flags_ = RefFlags::kNone;  // Fus reference attributes
+  OperandRef *const operand_ = nullptr;        // operand we are referencing
+  ResourceRefList *const resources_ = nullptr; // resources we are referencing
+  ResourceDef *port_ = nullptr;                // port we are referencing
+  Reference *base_ = nullptr;                  // base ref for copied objects
+  bool used_ = false;                          // was this reference ever used?
+  bool seen_ = false;                          // ever considered for a SU?
+  bool is_duplicate_ = false;                  // duplicate resource reference?
+};
+
+//---------------------------------------------------------------------------
+// Describe a conditional reference, corresponding to an if/then/else
+// latency statement.
+//---------------------------------------------------------------------------
+class ConditionalRef : public MdlItem {
+public:
+  ConditionalRef(MdlItem &item, Identifier *predicate, ReferenceList *refs,
+                 ConditionalRef *else_clause)
+      : MdlItem(item), predicate_(predicate), refs_(*refs),
+        else_clause_(else_clause) {}
+  // This constructor is used to copy conditional references when we
+  // instantiate a latency reference.
+  explicit ConditionalRef(ConditionalRef *item, ConditionalRef *else_clause)
+      : MdlItem(*item), predicate_(item->predicate_),
+        instr_predicate_(item->instr_predicate_), else_clause_(else_clause) {}
+
+  Identifier *predicate() const { return predicate_; }
+  PredExpr *instr_predicate() const { return instr_predicate_; }
+  void SetInstrPredicate(PredExpr *pred) { instr_predicate_ = pred; }
+
+  ReferenceList &refs() { return refs_; }
+  ConditionalRef *else_clause() const { return else_clause_; }
+
+  // Return true if this ConditionalRef instance has a single operand reference.
+  bool IsSingleRef() const {
+    if (refs_.empty())
+      return false;
+    if (refs_[0]->use_cycles() != 1)
+      return false; // overly strict, probably.
+    // We can't handle nested ifs, nor non-defs/uses.
+    if (refs_[0]->IsConditionalRef() ||
+        (!refs_[0]->IsDef() && !refs_[0]->IsUse()))
+      return false;
+    // Everything else in the reference list must be an fus.
+    for (unsigned id = 1; id < refs_.size(); id++)
+      if (!refs_[id]->IsFuncUnitRef())
+        return false;
+    return true;
+  }
+
+  // Return true if all ConditionalRefs in this object reference same operand.
+  bool IsSingleOperand() const {
+    if (!IsSingleRef())
+      return false;
+    if (auto *operand = refs_[0]->operand())
+      return IsSingleOperand(operand->name(), refs_[0]->ref_type());
+    return false;
+  }
+  bool IsSingleOperand(const std::string &name, RefType ref_type) const {
+    if (!IsSingleRef())
+      return false;
+    if (ref_type != refs_[0]->ref_type())
+      return false;
+    if (auto *operand = refs_[0]->operand()) {
+      if (operand->name() != name)
+        return false;
+      if (!else_clause())
+        return true;
+      return else_clause()->IsSingleOperand(name, ref_type);
+    }
+    return false;
+  }
+  // Return true if a conditional reference contains any operand refs.
+  bool HasOperandRefs() {
+    if (HasOperandRefs(refs_))
+      return true;
+    if (else_clause_ != nullptr)
+      return else_clause_->HasOperandRefs();
+    return false;
+  }
+  // Return true if a set of references contains any operand refs.
+  bool HasOperandRefs(ReferenceList &refs) {
+    for (auto *ref : refs_) {
+      if (ref->IsConditionalRef() && ref->conditional_ref()->HasOperandRefs())
+        return true;
+      if (ref->operand() != nullptr)
+        return true;
+    }
+    return false;
+  }
+
+  // Return true if a conditional reference contains any resource refs.
+  bool HasResourceRefs() {
+    if (HasResourceRefs(refs_))
+      return true;
+    if (else_clause_ != nullptr)
+      return else_clause_->HasResourceRefs();
+    return false;
+  }
+
+  // Return true if a set of references contains any operand refs.
+  bool HasResourceRefs(ReferenceList &refs) {
+    for (auto *ref : refs_) {
+      if (ref->IsFuncUnitRef())
+        return true;
+      if (ref->IsConditionalRef() && ref->conditional_ref()->HasResourceRefs())
+        return true;
+      if (!ref->resources()->empty())
+        return true;
+    }
+    return false;
+  }
+
+  std::string ToString(bool is_else);
+
+private:
+  Identifier *predicate_ = nullptr;       // Named predicate
+  PredExpr *instr_predicate_ = nullptr;   // predicate expression
+  ReferenceList refs_;                    // list of conditional refs
+  ConditionalRef *else_clause_ = nullptr; // else clause of if stmt
+};
+
+//---------------------------------------------------------------------------
+// Describe a container for instruction information needed by the database
+// generation code. For each instruction, we need:
+//    - The instruction name - the name used in the target compiler backend.
+//    - For each operand (in the order defined in the compiler backend)
+//        - The operand type name (immediate, register class, etc).
+//        - The operand name.
+//    - The subunit name as defined in the target's mdl file.
+//    - (Future) What processors it's valid for.
+//
+// The instruction name must match the symbolic name used for the
+// instruction in the target compiler backend. For LLVM, this would
+// correspond to the tablegen-generated emumerated name for each
+// instruction. This is used by the mdl compiler to connect generated
+// information with target instruction ids in the back-end.
+//
+// The order of operands must match the order of operands as defined by
+// the compiler back-end - the MDL compiler generates code that can
+// access these operands by index.
+//
+// The operand names are arbitrary, but ideally these are generated from
+// the actual backend description file (such as td files for LLVM). The
+// mdl latency rules use these names to refer to specific operands.
+//
+// The operand type names should also match the target compiler
+// instruction descriptions. For registers, it should match a defined
+// register class. Others are some kind of constant value.
+//----------------------------------------------------------------------------
+
+//----------------------------------------------------------------------------
+// Description of a single operand declaration.
+//----------------------------------------------------------------------------
+class OperandDecl : public MdlItem {
+public:
+  OperandDecl(const MdlItem &item, Identifier *type, Identifier *name,
+              bool ellipsis, bool input, bool output)
+      : MdlItem(item), types_(new IdList({type})), names_(new IdList({name})),
+        ellipsis_(ellipsis), input_(input), output_(output) {}
+
+  OperandDecl(OperandDecl *item, OperandDecl *parent)
+      : MdlItem(),
+        types_(new IdList((*parent->types_).begin(), (*parent->types_).end())),
+        names_(new IdList((*parent->names_).begin(), (*parent->names_).end())),
+        is_implied_register_(item->is_implied_register_),
+        ellipsis_(parent->ellipsis_), input_(parent->input_),
+        output_(parent->output_), operand_(item->operand_),
+        reg_class_(item->reg_class_) {}
+
+  // Set this to true if we want to see more detail (for debugging).
+  const bool print_fully_qualified_declaration_ = true;
+  std::string ToString() const;
+
+  std::string const &name() const { return (*names_)[0]->name(); }
+  std::string const type_name() const { return (*types_)[0]->name(); }
+  Identifier *type() const { return (*types_)[0]; }
+  IdList *types() const { return types_; }
+  Identifier *base_type() const { return types_->back(); }
+  Identifier *op_name() const { return (*names_)[0]; }
+  IdList *op_names() const { return names_; }
+  void add_type(Identifier *type) { types_->push_back(type); }
+  void add_name(Identifier *name) { names_->push_back(name); }
+  bool is_implied_register() const { return is_implied_register_; }
+  bool is_ellipsis() const { return ellipsis_; }
+  bool is_input() const { return input_; }
+  bool is_output() const { return output_; }
+
+  OperandDef *operand() const { return operand_; }
+  RegisterClass *reg_class() const { return reg_class_; }
+  void set_operand(OperandDef *operand) { operand_ = operand; }
+  void set_regclass(RegisterClass *reg_class) { reg_class_ = reg_class; }
+  void set_is_implied_register() { is_implied_register_ = true; }
+
+private:
+  IdList *types_ = nullptr;          // type(s) of operand
+  IdList *names_ = nullptr;          // name(s) of operand
+  bool is_implied_register_ = false; // is this operand an implied register?
+  bool ellipsis_ = false;            // was this operand an ellipsis?
+  bool input_ = false;               // was the operand tagged as an input?
+  bool output_ = false;              // was the operand tagged as an output?
+
+  OperandDef *operand_ = nullptr;      // pointer to associated operand type
+  RegisterClass *reg_class_ = nullptr; // pointer to associated register class
+};
+
+//----------------------------------------------------------------------------
+// Description of a single operand definition.
+//----------------------------------------------------------------------------
+class OperandDef : public MdlItem {
+public:
+  OperandDef(const MdlItem &item, Identifier *name, OperandDeclList *operands,
+             Identifier *type, OperandAttributeList *attributes, IdList *bases)
+      : MdlItem(item), name_(name), bases_(bases), operands_(operands),
+        type_(type), attributes_(attributes),
+        base_operands_(new OperandDefList) {}
+
+  std::string ToString() const;
+  std::string const &name() const { return name_->name(); }
+  OperandDeclList *operands() const { return operands_; }
+  Identifier const *type() const { return type_; }
+  IdList *bases() const { return bases_; }
+  OperandDefList *base_operands() const { return base_operands_; }
+  void add_base_operand(OperandDef *base) { base_operands_->push_back(base); }
+  OperandAttributeList *attributes() const { return attributes_; }
+  bool IsDerivedOperand() const { return bases_ != nullptr; }
+
+private:
+  Identifier *name_ = nullptr;                 // name of the operand
+  IdList *bases_ = nullptr;                    // base operands (for derived)
+  OperandDeclList *operands_ = nullptr;        // list of operand declarations
+  Identifier *type_ = nullptr;                 // type of operand
+  OperandAttributeList *attributes_ = nullptr; // attributes defined
+  OperandDefList *base_operands_ = nullptr;    // base, if this opnd has one
+};
+
+//----------------------------------------------------------------------------
+// Description of a single operand attribute definition.
+// Currently, attributes must have integer values.  We could extend this if
+// necessary.
+//----------------------------------------------------------------------------
+class OperandAttribute : public MdlItem {
+public:
+  OperandAttribute(MdlItem &item, Identifier *name, std::vector<int> *values,
+                   std::string type, PredValueList *predicate_values,
+                   IdList *predicate)
+      : MdlItem(item), name_(name), values_(values), type_(type),
+        predicate_values_(predicate_values), predicate_(predicate) {}
+
+  std::string ToString() const;
+  std::string const &name() const { return name_->name(); }
+  std::vector<int> *values() const { return values_; }
+  int values(int i) const { return (*values_)[i]; }
+  std::string type() const { return type_; }
+  IdList *predicate() const { return predicate_; }
+  PredValueList *predicate_values() const { return predicate_values_; }
+
+private:
+  Identifier *name_ = nullptr;                // name of attribute
+  std::vector<int> *values_;                  // integer values of attribute
+  std::string type_;                          // type of operand value
+  PredValueList *predicate_values_ = nullptr; // predicate values (if any)
+  IdList *predicate_ = nullptr;               // attribute predicate
+};
+
+//----------------------------------------------------------------------------
+// Description of a single predicated value for operand attributes. A value
+// is an integer, a range of values, or a mask.
+//----------------------------------------------------------------------------
+class PredValue : public MdlItem {
+public:
+  enum PredValueType { kValue, kRange, kMask };
+
+  PredValue(MdlItem &item, uint64_t mask)
+      : MdlItem(item), type_(kMask), mask_(mask) {}
+  PredValue(MdlItem &item, int64_t low, int64_t high)
+      : MdlItem(item), type_(kRange), low_(low), high_(high) {
+    if (low == high)
+      type_ = kValue;
+  }
+
+  std::string ToString() const;
+  bool IsRange() const { return type_ == kRange; }
+  bool IsValue() const { return type_ == kValue; }
+  bool IsMask() const { return type_ == kMask; }
+
+  // Pretty print a predicate value.
+  std::string FormatValue(int64_t value) const;
+
+  int64_t value() const { return low_; }
+  int64_t low() const { return low_; }
+  int64_t high() const { return high_; }
+  uint64_t mask() const { return mask_; }
+
+private:
+  PredValueType type_;         // is this a value, range, or mask
+  uint64_t mask_ = 0;          // mask bits
+  int64_t low_ = 0, high_ = 0; // range of values (or value if same)
+};
+
+//----------------------------------------------------------------------------
+// Description of a single instruction.
+//----------------------------------------------------------------------------
+class InstructionDef : public MdlItem {
+public:
+  InstructionDef(const MdlItem &item, Identifier *name,
+                 OperandDeclList *operands, IdList *subunits, IdList *derived)
+      : MdlItem(item), name_(name), operands_(operands),
+        subunits_(subunits ? subunits : new IdList), derived_(derived) {}
+
+  std::string ToString() const;
+  std::string const &name() const { return name_->name(); }
+  IdList *subunits() const { return subunits_; }
+  OperandDeclList *operands() const { return operands_; }
+  OperandDeclList *flat_operands() const { return flat_operands_; }
+  IdList *derived() const { return derived_; }
+  void set_flat_operands(OperandDeclList *opnds) { flat_operands_ = opnds; }
+  bool has_ellipsis() const {
+    return !operands()->empty() && operands()->back()->is_ellipsis();
+  }
+  int num_operands() const { return operands()->size(); }
+  int num_flat_operands() const { return flat_operands()->size(); }
+
+  // Get the operand declaratopm of the nth operand.
+  // Note: variable arguments never have declared types.
+  OperandDecl *GetOperandDecl(unsigned index) const {
+    if (index >= flat_operands_->size())
+      return nullptr;
+    return (*flat_operands_)[index];
+  }
+
+  // Get the operand type of the nth operand.
+  OperandDef *GetOperandType(int index) const {
+    return GetOperandDecl(index)->operand();
+  }
+  void add_subunit(SubUnitTemplate *subunit) {
+    if (FindItem(*subunits_, subunit->name()))
+      return;
+    subunits_->push_back(new Identifier(subunit->name()));
+  }
+
+private:
+  Identifier *name_ = nullptr;               // name of the instruction
+  OperandDeclList *operands_ = nullptr;      // list of operand declarations
+  OperandDeclList *flat_operands_ = nullptr; // flattened operand declarations
+  IdList *subunits_ = nullptr; // subunits associated with instruction
+  IdList *derived_ = nullptr;  // instructions derived from this one
+};
+
+//----------------------------------------------------------------------------
+// Capture a single functional unit instantiation and the context of how
+// it was instantiated (CPU, Cluster, Parent FU).
+//----------------------------------------------------------------------------
+class FuncUnitInstantiation {
+public:
+  // This constructor is for instantiating top-level functional units.
+  FuncUnitInstantiation(MdlSpec *spec, CpuInstance *cpu,
+                        ClusterInstance *cluster, FuncUnitInstance *instance)
+      : spec_(spec), cpu_(cpu), cluster_(cluster), instance_(instance),
+        func_type_(instance->get_template()) {
+    InstantiateLocalDefs();
+  }
+
+  // This constructor is for instantiating base functional units, which
+  // reuse ports, resources, and the instance of the parent functional unit.
+  FuncUnitInstantiation(FuncUnitInstantiation *fu, FuncUnitTemplate *base)
+      : spec_(fu->spec()), cpu_(fu->cpu()), cluster_(fu->cluster()),
+        instance_(fu->instance()),
+        func_type_(base), // Note - not instance->get_template()!!
+        resource_args_(fu->resource_args()), class_args_(fu->class_args()),
+        parent_(fu) {
+    InstantiateLocalDefs();
+  }
+
+  // Error check a merged resource reference that has an allocation.
+  ResourceRef *CheckAllocation(ResourceRef *def, ResourceRef *ref);
+  // Create a merged resource reference from a definition and a reference.
+  ResourceRef *MergeRefs(ResourceRef *def, ResourceRef *ref);
+  // Return the template for this instantiation.
+  FuncUnitTemplate *get_template() { return func_type_; }
+  // Create definition objects for locally define references and ports.
+  void InstantiateLocalDefs();
+  // Look up a register class in the templates parameter list.
+  RegisterClass *FindRegClass(Identifier *item);
+  // Bind a functional unit instantiation parameter to a register class.
+  void BindClassArg(ResourceRef *arg);
+  // Bind a functional unit instantiation parameter to a resource reference.
+  void BindResourceArg(ResourceRef *arg);
+  // Map a functional unit instantiation parameter id to its bound resource.
+  ResourceRef *GetResourceArg(int param_id);
+  // Map a functional unit instantation parameter id to its bound class.
+  RegisterClassRef *GetClassArg(int param_id);
+  // Determine if a predicate matches the instantiation context's cpu name or
+  // functional unit name.  Return true if its valid.
+  bool ValidPredicate(IdList *predicates) const;
+  // For each subunit instance in a functional unit instantiation, create a
+  // subunit instantiation, bind its instance parameters, and instantiate
+  // all of its latency instances.
+  void InstantiateSubunits();
+  // For each connect statement, find the connected resources and register
+  // classes, annotate the associated port.
+  void ProcessConnects();
+  // Bind a subunit port argument to its definition.
+  // Return the definition if found, otherwise return nullptr.
+  ResourceDef *BindSubUnitPort(ResourceRef *arg);
+  // Bind a subunit resource argument to its definition.
+  // Return the definition if found, otherwise return nullptr.
+  ResourceRef *BindSubUnitResource(ResourceRef *arg);
+  // Bind a functional unit instance resource argument to its definition.
+  // Return the definition if found, otherwise return nullptr.
+  ResourceRef *BindFuncUnitResource(ResourceRef *arg);
+  // Bind a functional unit instance register class argument to its definition.
+  // Return the definition if found, otherwise return nullptr.
+  RegisterClassRef *BindFuncUnitClass(ResourceRef *arg);
+
+  // Bind functional unit instantiation parameters to resources and classes.
+  void BindFuncUnitParameters();
+  // Bind any slot pinning resources.
+  void BindFuncUnitSlotResources();
+  // Bind subunit instantiation parameters to ports and resources.
+  void BindSubUnitParameters(SubUnitInstantiation *su);
+
+  // Error logging - forward error messages to MdlSpec logger.
+  template <typename... Ts>
+  bool ErrorLog(const MdlItem *item, const char *fmt, Ts... params) const;
+  template <typename... Ts>
+  void WarningLog(const MdlItem *item, const char *fmt, Ts... params) const;
+  int ErrorsSeen() const;
+
+  // Debug - dump a functional unit instantiation.
+  void DumpFuncUnitInstantiation();
+
+  std::string const &name() const { return instance_->name(); }
+  MdlSpec *spec() const { return spec_; }
+  CpuInstance *cpu() const { return cpu_; }
+  ClusterInstance *cluster() const { return cluster_; }
+  FuncUnitInstance *instance() const { return instance_; }
+  FuncUnitTemplate *func_type() const { return func_type_; }
+  ResourceRefDict &resource_args() { return resource_args_; }
+  RegisterClassRefDict &class_args() { return class_args_; }
+  ResourceDefList &resources() { return resources_; }
+  ResourceDefList &ports() { return ports_; }
+  FuncUnitInstantiation *parent() const { return parent_; }
+
+  // Create an implicit resource for this instance.
+  void set_resource() {
+    fu_resource_ = new ResourceDef(instance_->id());
+    fu_resource_->set_unreserved(instance_->is_unreserved());
+    fu_resource_->set_buffered(instance_->is_buffered());
+  }
+  ResourceDef *get_resource() const { return fu_resource_; }
+  ResourceDef *get_root_resource() const {
+    auto *item = this;
+    for (; item->parent_ != nullptr; item = item->parent_)
+      ;
+    return item->fu_resource_;
+  }
+  // Get this instance's implied resource, and all of its parents' resource.
+  ResourceDefList get_resources() const {
+    ResourceDefList resources;
+    resources.push_back(fu_resource_);
+    for (auto *parent = parent_; parent; parent = parent->parent_)
+      resources.push_back(parent->fu_resource_);
+    return resources;
+  }
+
+private:
+  MdlSpec *spec_;                      // pointer to entire file description
+  CpuInstance *cpu_;                   // the parent CPU
+  ClusterInstance *cluster_;           // the parent cluster
+  FuncUnitInstance *instance_;         // the functional unit instance
+  FuncUnitTemplate *func_type_;        // the functional unit type, or base
+  ResourceRefDict resource_args_;      // resource arguments to instance
+  RegisterClassRefDict class_args_;    // register class arguments to instance
+  ResourceDefList resources_;          // resources defined for this instance
+  ResourceDefList ports_;              // ports defined for this instance
+  ResourceDef *fu_resource_ = nullptr; // implicit resource for this FU
+  FuncUnitInstantiation *parent_ = nullptr; // Parent functional unit
+};
+
+//----------------------------------------------------------------------------
+// Capture a single subunit instantiation and the context of how it was
+// instantiated.
+//----------------------------------------------------------------------------
+class SubUnitInstantiation {
+public:
+  SubUnitInstantiation(FuncUnitInstantiation *func, SubUnitInstance *subunit)
+      : spec_(func->spec()), func_unit_(func), subunit_(subunit) {
+    su_template_ = subunit->get_template();
+  }
+
+  // Return the implicit functional unit resource associated with this instance.
+  ResourceDefList GetFuncUnitResources() const {
+    return func_unit()->get_resources();
+  }
+
+  // Return slots resources associated with this subunit.
+  ResourceRefList *GetSlotResourcesAny() const {
+    return func_unit()->instance()->get_resource_slots_any();
+  }
+  ResourceRefList *GetSlotResourcesAll() const {
+    return func_unit()->instance()->get_resource_slots_all();
+  }
+  // Bind a port definition to the associated instantiation parameter.
+  void BindPortArg(ResourceRef *arg);
+  // Bind a resource definition to the associated instantiation parameter.
+  void BindResourceArg(ResourceRef *arg);
+  // Map a subunit instantiation parameter id to its bound resource.
+  ResourceRef *GetResourceArg(int param_id);
+  // Map a subunit instantiation parameter id to its bound port.
+  ResourceDef *GetPortArg(int param_id);
+
+  // Determine if a latency predicate matches the instantiation context's
+  // cpu name or functional unit name.
+  bool ValidPredicate(IdList *predicates) const;
+  // Bind a latency latency instance port argument to its definition.
+  // Return the definition if found, otherwise return nullptr.
+  ResourceDef *BindLatPort(ResourceRef *arg);
+  // Bind a latency resource argument to its definition.
+  // Return the definition if found, otherwise return nullptr.
+  ResourceRef *BindLatResource(ResourceRef *arg);
+  // Bind latency instantation parameters to ports and resources.
+  void BindLatencyParams(LatencyInstantiation *lat);
+  // Bind latency reference resources to latency template parameters.
+  void BindLatencyResources(LatencyInstantiation &lat, Reference *reference,
+                            ResourceRefList *resources);
+  ConditionalRef *CopyLatencyCondReference(LatencyInstantiation &lat,
+                                           ConditionalRef *cond);
+  void CopyLatencyReference(LatencyInstantiation &lat,
+                            ReferenceList &references, Reference *ref);
+  // Add references from a single latency template to a subunit instantiation.
+  void InstantiateLatency(LatencyInstantiation &lat,
+                          LatencyTemplate *lat_template);
+  // Add references from a parent latency to a subunit instantiation, then
+  // add all of its bases, recursively.
+  void InstantiateLatencyBases(LatencyInstantiation &lat,
+                               LatencyTemplate *parent, LatencyList &bases);
+  // Instantiation all the latencies (and latency bases) associated with
+  // a subunit instantiation.
+  void InstantiateLatencies();
+
+  // Error logging - forward error messages to MdlSpec logger.
+  template <typename... Ts>
+  bool ErrorLog(const MdlItem *item, const char *fmt, Ts... params) const;
+  template <typename... Ts>
+  void WarningLog(const MdlItem *item, const char *fmt, Ts... params) const;
+  int ErrorsSeen() const;
+
+  // Debug: dump all subunit instantiations.
+  void DumpSubUnitInstantiation();
+
+  MdlSpec *spec() const { return spec_; }
+  CpuInstance *cpu() const { return func_unit_->cpu(); }
+  FuncUnitInstantiation *func_unit() const { return func_unit_; }
+  SubUnitInstance *subunit() const { return subunit_; }
+  SubUnitTemplate *su_template() const { return su_template_; }
+  ResourceRefDict &resource_args() { return resource_args_; }
+  ResourceDefDict &port_args() { return port_args_; }
+  ReferenceList &references() { return references_; }
+
+private:
+  MdlSpec *spec_;                    // pointer to entire file description
+  FuncUnitInstantiation *func_unit_; // context of this subunits instantiation
+  SubUnitInstance *subunit_;         // the subunit instance
+  SubUnitTemplate *su_template_;     // the template for this subunit
+  ResourceRefDict resource_args_;    // resource arguments to this instance
+  ResourceDefDict port_args_;        // port arguments to this instance
+  ReferenceList references_;         // instantiated list of references
+};
+
+//----------------------------------------------------------------------------
+// Capture a single latency instantiation and the context of how it was
+// instantiated.
+//----------------------------------------------------------------------------
+class LatencyInstantiation {
+public:
+  LatencyInstantiation(SubUnitInstantiation *su, LatencyInstance *latency)
+      : subunit_(su), latency_(latency) {
+    lat_template_ = latency_->get_template();
+  }
+
+  // Bind a resource definition to the associated instantiation parameter.
+  void BindResourceArg(ResourceRef *arg);
+  // Bind a port definition to the associated instantiation parameter.
+  void BindPortArg(ResourceRef *arg);
+  // Map a latency instantiation parameter to its bound resource.
+  ResourceRef *GetResourceArg(int param_id);
+  // Map a latency instantiation parameter to its bound resource.
+  ResourceDef *GetPortArg(int param_id);
+  // Debug: dump this latency instantiation.
+  void DumpLatencyInstantiation();
+
+  SubUnitInstantiation *subunit() const { return subunit_; }
+  LatencyInstance *latency() const { return latency_; }
+  LatencyTemplate *lat_template() const { return lat_template_; }
+  ResourceRefDict &resource_args() { return resource_args_; }
+  ResourceDefDict &port_args() { return port_args_; }
+
+private:
+  SubUnitInstantiation *subunit_; // context of this instantiation
+  LatencyInstance *latency_;      // latency instance
+  LatencyTemplate *lat_template_; // template for this latency
+  ResourceRefDict resource_args_; // resource arguments to this instance
+  ResourceDefDict port_args_;     // port arguments to this instance
+};
+
+//----------------------------------------------------------------------------
+// Container that captures all the contents of a machine description.
+// MdlSpec owns all of these vectors and their contents.
+//----------------------------------------------------------------------------
+class MdlSpec {
+public:
+  MdlSpec(bool print_warnings, bool warnings_are_fatal)
+      : print_warnings_(print_warnings),
+        warnings_are_fatal_(warnings_are_fatal) {
+    AddBuiltinPredicates();
+  }
+
+  void AddSubUnitInstantiation(SubUnitInstantiation *su) {
+    su_instantiations_[su->subunit()->name()]->push_back(su);
+  }
+
+  // Create default subunits to instructions that don't have subunits.
+  void CheckInstructionSubunits();
+
+  // Add a subunit instance to a "catchall" functional unit, and add it to the
+  // specified cpu.
+  void AddSubunitToCpu(CpuInstance *cpu, SubUnitTemplate *subunit);
+
+  // Scan latency templates to find which functional units they reference,
+  // then tie each client subunit to any referenced functional units.
+  std::set<std::string> FindLatencyFuncUnits(ReferenceList *references);
+  std::set<std::string> *FindLatencyFuncUnits(LatencyTemplate *lat);
+  void FindFunctionalUnitClientCpus(FuncUnitTemplate *funit, CpuInstance *cpu);
+  void FindFunctionalUnitClientCpus();
+  void TieSubUnitsToFunctionalUnits();
+
+  // Tie a subunit to a set of instructions that match a set of
+  // regular expressions.
+  void TieSubUnitToInstructions(SubUnitTemplate *su, StringList *regex_bases);
+  // Tie a derived subunit to any instruction associated with any of its bases.
+  void TieDerivedSubUnitsToInstructions();
+
+  // Check that the input spec has some basic required components.
+  void CheckInputStructure();
+
+  // Create a function unit instance object and add to the functional unit
+  // instance table.
+  FuncUnitInstantiation *AddFuncUnitInstantiation(CpuInstance *cpu,
+                                                  ClusterInstance *cluster,
+                                                  FuncUnitInstance *fu_inst);
+
+  // Create a base function unit instance object and add to table.
+  FuncUnitInstantiation *
+  AddFuncUnitBaseInstantiation(FuncUnitInstantiation *parent,
+                               FuncUnitTemplate *base);
+
+  // Create dictionaries for functional units, subunits, and latencies.
+  // We don't care here about duplicate names (checked separately).
+  // Also build instance tables for functional units and subunits.
+  void BuildDictionaries();
+  void FindImplicitFuncUnitTemplates();
+  void FindValidPredicateNames();
+  void IsValidPredicateName(const Identifier *name);
+
+  // First-round semantic checking of the input machine description.
+  void SameParams(const ParamsList *params, const ParamsList *base_params,
+                  MdlItem *item);
+  void ValidateArgs(const ParamsList *params, const ResourceRefList *instance,
+                    MdlItem *item);
+  void CheckForDuplicateDefs();
+  void CheckTemplateBases();
+
+  bool ExpandGroup(FuncUnitGroup *group, IdList *members, unsigned depth);
+  void CheckInstantiations();
+  void CheckIssueSlots();
+  void CheckInstructions();
+  void CheckOperand(OperandDecl *operand_decl);
+  bool CheckRecursiveOperands(OperandDef *opnd, OperandDefList &seen);
+  void CheckOperandDerivations(OperandDef *opnd);
+  void CheckOperands();
+  void CheckConditionalReferences(ConditionalRef *cond_ref);
+  void CheckReferences();
+  void CheckReferenceUse();                      // Look for unused references.
+  void CheckSubunitUse();                        // Look for unused subunits.
+  void CheckResourceDef(const ResourceDef *def); // Check a single resource.
+  void CheckResourceDefs(); // Make sure shared pools are properly declared.
+  void CheckResourceUse();  // Look for suspect resource use.
+
+  // Add global resource definitions to each CPU.
+  void PromoteGlobalResources();
+  // Scan resource definitions for CPUs, Clusters, and Functional Unit
+  // Templates and promote any group member to a general resource.
+  void PromoteResourceGroupMembers(ResourceDefList *resources,
+                                   ResourceDefList *outer_scope,
+                                   ResourceRefDict *args);
+  void PromoteFuncUnitGroupAregs(ClusterInstance *cluster);
+  void PromoteResourceGroups();
+  void CheckPromotedMember(ResourceDef *group, Identifier *member,
+                           ResourceDef *promoted);
+
+  void FlattenOperand(OperandDecl *opnd, OperandDeclList *flat_ops);
+  void FlattenInstructionOperands();
+  void CheckPhaseDefinitions(PipeDefList *pipes);
+  bool SpecializePhaseExpr(PhaseExpr *expr, CpuInstance *cpu);
+  void CheckReferencePhases(ReferenceList *refs);
+  void CheckPipeReferences();
+  void CheckPipeReference(ResourceDef *def, CpuInstance *cpu);
+  bool CheckSubOperands(OperandRef *ref, const Identifier *opnd, int idx);
+  PhaseName *SearchPipeReference(Identifier *phase, CpuInstance *cpu);
+  PhaseName *FindPipeReference(Identifier *phase, CpuInstance *cpu);
+
+  // Return the first phase of the first pipeline definition.
+  PhaseName *FindFirstPhase();
+  // Return the first phase identified as an "execute" phase.
+  PhaseName *FindFirstExecutePhase(CpuInstance *cpu);
+
+  // Instantiate base functional unit instances.
+  void AddFunctionalUnitBases(FuncUnitInstantiation *parent);
+  // Instantiate a single functional unit instance.
+  void InstantiateFunctionalUnit(CpuInstance *cpu, ClusterInstance *cluster,
+                                 FuncUnitInstance *fu);
+  // Iterate over the spec and instantiate every functional unit instance.
+  void InstantiateFunctionalUnits();
+  // For every CPU, build a map of instances for each functional unit template.
+  void BuildFuncUnitInstancesMap();
+  // Assign ids to every resource.
+  void AssignResourceIds();
+  // Assign ids to each pooled resource.
+  void AssignPoolIds();
+  // Debug: Dump every resource id and its context.
+  void DumpResourceIds();
+
+  // Print out the entire specification.
+  std::string ToString() const;
+  // Debug: dump out all subunit instantiations.
+  void DumpSubUnitInstantiations();
+  // Debug: dump out all functional unit instantiations.
+  void DumpFuncUnitInstantiations();
+  // Debug: dump out all user-defined predicates.
+  void DumpPredicates();
+
+  void AddBuiltinPredicates() {
+    std::string ktrue = "TruePred", kfalse = "FalsePred";
+    EnterPredicate(ktrue, new PredExpr(PredOp::kTrue));
+    EnterPredicate(kfalse, new PredExpr(PredOp::kFalse));
+  }
+
+public:
+  // Template function to check for duplicate entries in two symbol definition
+  // lists. Print an error message for each duplicate found.
+  template <typename A, typename B>
+  void FindDuplicates(const std::vector<A *> &a, const std::vector<B *> &b) {
+    for (auto *a_item : a)
+      for (auto *b_item : b)
+        if (a_item->name() == b_item->name())
+          ErrorLog(
+              a_item,
+              "Duplicate definition of {0}\n     Previously defined at {1}",
+              a_item->name(), b_item->Location());
+  }
+
+  // Template function to check a vector of definitions to make sure
+  // each name is unique. Print an error message for each duplicate found.
+  // Don't bother checking empty names associated with implied operands.
+  template <typename A> void FindDuplicates(const std::vector<A *> &items) {
+    for (unsigned i = 0; i < items.size(); i++)
+      for (unsigned j = i + 1; j < items.size(); j++)
+        if (!items[i]->name().empty() && items[i]->name() == items[j]->name()) {
+          ErrorLog(
+              items[j],
+              "Duplicate definition of {0}\n     Previously defined at {1}",
+              items[j]->name(), items[i]->Location());
+          break; // We only need to find the first duplicate for each item.
+        }
+  }
+
+  // Check the member list of a resource definition for duplicate members.
+  void FindDuplicateMembers(ResourceDefList &items) {
+    for (auto *item : items)
+      if (item->IsGroupDef())
+        FindDuplicates(item->members());
+  }
+
+  // Methods for looking up and matching operands in instructions.
+  int GetOperandIndex(const InstructionDef *instr, const OperandRef *operand,
+                      RefType ref_type);
+  bool CompareOpndNames(const OperandDecl *opnd, const IdList &names);
+  int FindOperandName(const InstructionDef *instruct, const IdList &names,
+                      RefType type);
+  int FindOperand(const InstructionDef *instr, const IdList &name,
+                  const std::string &type, RefType ref_type);
+  bool FindOperandDerivation(const OperandDef *derived,
+                             const OperandDef *operand) const;
+
+  // Accessors (all return references, so can't be const).
+  PipeDefList &pipe_phases() { return pipe_phases_; }
+  ResourceDefList &resources() { return resources_; }
+  RegisterDefList &registers() { return registers_; }
+  RegisterClassList &reg_classes() { return reg_classes_; }
+  CpuList &cpus() { return cpus_; }
+  FuncUnitList &func_units() { return func_units_; }
+  FuncUnitGroupList &func_unit_groups() { return func_unit_groups_; }
+  SubUnitList &subunits() { return subunits_; }
+  LatencyList &latencies() { return latencies_; }
+  InstructionList &instructions() { return instructions_; }
+  OperandDefList &operands() { return operands_; }
+
+  FuncUnitDict &fu_map() { return fu_map_; }
+  FuncUnitGroupDict &fu_group_map() { return fu_group_map_; }
+  SubUnitDict &su_map() { return su_map_; }
+  LatencyDict &lat_map() { return lat_map_; }
+  OperandDict &operand_map() { return operand_map_; }
+  InstructionDict &instruction_map() { return instruction_map_; }
+  RegisterClassDict &reg_class_map() { return register_class_map_; }
+  SubUnitInstantiations &su_instantiations() { return su_instantiations_; }
+
+  bool IsFuncUnitTemplate(const std::string &name) const {
+    return fu_map_.count(name);
+  }
+  bool IsFuncUnitGroup(const std::string &name) const {
+    return fu_group_map_.count(name);
+  }
+
+  void set_family_name(const Identifier *name) {
+    if (!family_name_.empty() && family_name_ != name->name())
+      ErrorLog(name, "Incompatible family name specification");
+    else
+      family_name_ = name->name();
+  }
+  std::string family_name() const { return family_name_; }
+
+  // Error and Warning management.
+  template <typename... Ts>
+  bool ErrorLog(const MdlItem *item, const char *fmt, Ts... params);
+  bool ErrorLog(const MdlItem *item, const std::string &msg) {
+    WriteMessage(item, msg);
+    error_count_++;
+    return true;
+  }
+
+  template <typename... Ts>
+  void WarningLog(const MdlItem *item, const char *fmt, Ts... params);
+  void WarningLog(const MdlItem *item, const std::string &msg) {
+    if (print_warnings_ || warnings_are_fatal_) {
+      std::string prefix = !warnings_are_fatal_ ? "Warning: " : "";
+      WriteMessage(item, prefix + msg);
+      warning_count_++;
+    }
+  }
+
+  int ErrorsSeen() const {
+    return error_count_ + (warnings_are_fatal_ ? warning_count_ : 0);
+  }
+  int WarningsSeen() const { return warning_count_; }
+
+  // Error logging: Avoid printing identical error messages.
+  void WriteMessage(const MdlItem *item, const std::string &msg);
+
+  int PredOperandIndex(const PredExpr *pred, const InstructionDef *instr);
+
+  // Functions to manipulate user-defined predicates. Predicates are defined
+  // in an instruction-independent manner, yet in the MDL compiler we apply
+  // each predicate to each associated instruction, so that we can partially
+  // (and often completely) eliminate the predicate at compiler-build time.
+  PredExpr *EvaluatePredicate(std::string name, const InstructionDef *instr);
+  PredExpr *EvaluatePredicate(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredSimple(PredExpr *pred, const InstructionDef *instr) {
+    return pred;
+  }
+  PredExpr *PredEvalName(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckAny(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckAll(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckNot(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckOpcode(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckIsReg(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckReg(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckInvalidReg(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckSameReg(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckNumOperand(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckIsImm(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckImm(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredCheckZero(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredOpcodeSwitchStmt(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredOpcodeSwitchCase(PredExpr *pred, const InstructionDef *instr);
+  PredExpr *PredReturnStatement(PredExpr *pred, const InstructionDef *instr);
+
+  // Functions to simpify predicates (this largely implements De Morgan's laws
+  // on predicate expressions.
+  void SimplifyPredicates();
+  PredExpr *PredSimplify(PredExpr *expr);
+
+  // Interfaces to the instruction predicate table.
+  bool IsValidInstructionPredicate(const std::string &name) const {
+    return predicate_table_.count(name);
+  }
+  // Look up a predicate by name, and return the associated predicate.
+  // If the predicate maps to a name, recur on that name.
+  PredExpr *LookupPredicate(PredExpr *pred);
+
+  void EnterPredicate(std::string &name, PredExpr *pred) {
+    if (!IsValidInstructionPredicate(name)) {
+      predicate_table_[name] = pred;
+      return;
+    }
+    if (name == "TruePred" || name == "FalsePred")
+      return;
+    ErrorLog(pred, "Redefinition of predicate: {0}", name);
+  }
+  std::map<std::string, PredExpr *> &predicate_table() {
+    return predicate_table_;
+  }
+
+private:
+  std::string family_name_;            // Family name of processors.
+  PipeDefList pipe_phases_;            // List of pipe specs defined in mdl.
+  ResourceDefList resources_;          // List of resources defined in mdl.
+  RegisterDefList registers_;          // List of registers defined in mdl.
+  RegisterClassList reg_classes_;      // List of register classes defined.
+  CpuList cpus_;                       // List of CPU's defined.
+  FuncUnitList func_units_;            // List of functional unit templates.
+  FuncUnitGroupList func_unit_groups_; // List of functional unit groups.
+  SubUnitList subunits_;               // List of subunit templates defined.
+  LatencyList latencies_;              // List of latency templates defined.
+  InstructionList instructions_;       // List of instruction definitions.
+  OperandDefList operands_;            // List of operand definitions.
+
+  FuncUnitDict fu_map_;             // Dictionary of functional unit templates.
+  FuncUnitGroupDict fu_group_map_;  // Dictional of functional unit groups.
+  SubUnitDict su_map_;              // Dictionary of subunit templates.
+  LatencyDict lat_map_;             // Dictionary of latency templates.
+  OperandDict operand_map_;         // Dictionary of operand definitions.
+  InstructionDict instruction_map_; // Dictionary of instruction definitions.
+  RegisterClassDict register_class_map_;    // Dictionary of register classes.
+  SubUnitInstantiations su_instantiations_; // Table of all su instances.
+
+  // Set of all names that can be used as mdl predicates.
+  std::unordered_set<std::string> valid_predicate_names_;
+
+  // Dictionary of user-defined predicate expressions, indexed by name.
+  std::map<std::string, PredExpr *> predicate_table_;
+
+  // Cache the first phase name found in the spec.
+  PhaseName *first_phase_name_ = nullptr; // lowest pipeline phase.
+
+  // Objects to manage error logging.
+  std::unordered_set<std::string> error_messages_;
+  int error_count_ = 0; // Fatal error count.
+  int warning_count_ = 0;
+  bool print_warnings_ = true;
+  bool warnings_are_fatal_ = false;
+};
+
+//----------------------------------------------------------------------------
+// Error logging template function definitions.
+//----------------------------------------------------------------------------
+template <typename... Ts>
+inline std::string formatv(const char *fmt, Ts &&...vals) {
+  return std::string(llvm::formatv(fmt, vals...));
+}
+
+template <typename... Ts>
+bool SubUnitInstantiation::ErrorLog(const MdlItem *item, const char *fmt,
+                                    Ts... params) const {
+  return spec()->ErrorLog(item, fmt, params...);
+}
+
+template <typename... Ts>
+void SubUnitInstantiation::WarningLog(const MdlItem *item, const char *fmt,
+                                      Ts... params) const {
+  spec()->WarningLog(item, fmt, params...);
+}
+
+template <typename... Ts>
+bool FuncUnitInstantiation::ErrorLog(const MdlItem *item, const char *fmt,
+                                     Ts... params) const {
+  return spec()->ErrorLog(item, fmt, params...);
+}
+template <typename... Ts>
+void FuncUnitInstantiation::WarningLog(const MdlItem *item, const char *fmt,
+                                       Ts... params) const {
+  spec()->WarningLog(item, fmt, params...);
+}
+
+template <typename... Ts>
+bool MdlSpec::ErrorLog(const MdlItem *item, const char *fmt, Ts... params) {
+  return ErrorLog(item, formatv(fmt, params...));
+}
+
+template <typename... Ts>
+void MdlSpec::WarningLog(const MdlItem *item, const char *fmt, Ts... params) {
+  WarningLog(item, formatv(fmt, params...));
+}
+
+//----------------------------------------------------------------------------
+// External definitions.
+//----------------------------------------------------------------------------
+void Abort();
+
+extern ResourceRef *NullResourceRef;
+extern RegisterClass *NullRegisterClass;
+extern ResourceDef *NullPortDef;
+
+bool FindDerivation(OperandDef *ref, const OperandDef *decl,
+                    OperandDefList &opnds);
+OperandAttribute *FindAttribute(const std::string &name, const OperandDef *opnd,
+                                const SubUnitInstantiation *subunit);
+
+} // namespace mdl
+} // namespace mpact
+
+#endif // MDL_COMPILER_MDL_H_
diff --git a/llvm/utils/MdlCompiler/mdl.cpp b/llvm/utils/MdlCompiler/mdl.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl.cpp
@@ -0,0 +1,1467 @@
+//===- mdl.cpp - Instantiate mdl template objects -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains methods that implement instantiations of functional units,
+// subunits, and latency templates. The functions in this file implement
+// the first pass of architecture definition: generating a dictionary of
+// specialized subunit instances.
+//
+// General theory of architecture definition and expansion:
+//
+// A top-level architecture description consists of CPU definitions, functional
+// unit template definitions, subunit template definitions, and latency
+// template definitions.
+//
+// CPU definitions are composed of resource definitions, cluster definitions,
+// and specialized functional unit instances. Clusters are also collections of
+// resources and functional units. Each functional unit instance in a CPU
+// (or cluster) definition can be specialized with resource expressions and
+// register class parameters.
+//
+// Functional unit templates, like C++ templates, have parameters which can
+// be specified on each instance of the functional unit, creating specialized
+// instances of the functional unit.  They are composed of locally defined
+// resources and specialized subunit instances. Subunits are specialized with
+// resource expressions and defined ports (a type of FU-defined resource).
+//
+// Subunit templates similarly have parameters which can be specified on each
+// instance of the subunit in a functional unit template, creating specialized
+// instances of the subunit in each functional unit instance. A subunit template
+// instantiates one or more latency templates, which are specialized with
+// resources and ports.  Subunit templates are associated with each instruction
+// in the machine description, and is how we tie instruction behaviors to
+// CPUs and functional units.
+//
+// Latency templates similarly have parameters which can be specified on each
+// instance of the latency in subunit templates.  Latencies are specialized by
+// both the subunits they are instantiated be, and by the instructions they
+// are applied to.
+//
+// This first phase of architecture expansion proceeds as follows:
+//
+//   For each CPU definition:
+//      For each functional unit instance (or each FU in a Cluster definition):
+//         Create a specialized functional unit instance for the <CPU/FU> tuple
+//         For each subunit instance:
+//            create the specialized instance,
+//            for each latency instance:
+//              specialize the latency for the subunit, add it to the subunit
+//            add the subunit instance to a dictionary of subunits<CPU/FU/SU>
+//         Instantiate any base functional units
+//
+// This phase creates a global dictionary of specialized subunit instances,
+// and is used in the "generate" phase to generate latency instances that are
+// specialized per CPU, Cluster, Functional unit, Subunit, and instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mdl.h"
+#include "llvm/Support/Regex.h"
+
+namespace mpact {
+namespace mdl {
+
+//---------------------------------------------------------------------------
+// Find the first pipeline phase from the first pipeline definition.  This
+// is used as a "default" pipe phase identifier for implicit resources.
+// If there are no phase names, make one up to avoid errors downstream.
+//---------------------------------------------------------------------------
+PhaseName *MdlSpec::FindFirstPhase() {
+  if (first_phase_name_ != nullptr)
+    return first_phase_name_;
+  if (pipe_phases().empty() || pipe_phases()[0]->phase_names()->empty())
+    return first_phase_name_ = new PhaseName("E1");
+
+  return first_phase_name_ = (*pipe_phases()[0]->phase_names())[0];
+}
+
+//---------------------------------------------------------------------------
+// Find the first pipeline phase from the first pipeline definition.  This
+// is used as a "default" pipe phase identifier for implicit resources.
+// First look in the specified CPU, then the top-level spec.  If you don't
+// find it there, try "E1".  If you don't find that, use first phase.
+//---------------------------------------------------------------------------
+PhaseName *MdlSpec::FindFirstExecutePhase(CpuInstance *cpu) {
+  if (cpu != nullptr && !cpu->pipe_phases()->empty())
+    for (auto *pipe_def : *cpu->pipe_phases())
+      if (pipe_def->first_execute_phase_name() != nullptr)
+        return pipe_def->first_execute_phase_name();
+
+  for (auto *pipe_def : pipe_phases())
+    if (pipe_def->first_execute_phase_name() != nullptr)
+      return pipe_def->first_execute_phase_name();
+
+  auto first = Identifier("E1");
+  if (auto *phase = SearchPipeReference(&first, cpu))
+    return phase;
+  return FindFirstPhase();
+}
+
+//---------------------------------------------------------------------------
+// Conversions between strings and reference types.
+//---------------------------------------------------------------------------
+// For parsing the mdl input file.
+RefType StringToRefType(const std::string &ref_type) {
+  static std::map<std::string, RefType> *mapping =
+      new std::map<std::string, RefType>({{"predicate", RefTypes::kPred},
+                                          {"use", RefTypes::kUse},
+                                          {"def", RefTypes::kDef},
+                                          {"kill", RefTypes::kKill},
+                                          {"usedef", RefTypes::kUseDef},
+                                          {"hold", RefTypes::kHold},
+                                          {"res", RefTypes::kReserve},
+                                          {"fus", RefTypes::kFus}});
+  if (mapping->count(ref_type))
+    return mapping->at(ref_type);
+  return RefTypes::kNull;
+}
+
+// Table to convert RefType values to dense set of indexes.
+constexpr int RefMap[] = {
+    0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,     //  0-15
+    5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // 16-31
+    6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // 32-47
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // 48-63
+    7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // 64-79
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // 80-95
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // 96-111
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8}; // 112-128
+
+// For writing out debug information.
+std::string RefTypeToString(RefType ref_type) {
+  static const char *refs[] = {"null",   "predicate", "use",     "def", "kill",
+                               "usedef", "hold",      "reserve", "fus"};
+  if (ref_type < RefTypes::kNull || ref_type > RefTypes::kFus)
+    return "RefNull";
+  return refs[RefMap[static_cast<int>(ref_type)]];
+}
+
+// For writing out the database.
+std::string FormatReferenceType(RefType ref_type) {
+  static const char *refs[] = {"RefNull", "RefPred",    "RefUse",
+                               "RefDef",  "RefKill",    "RefUseDef",
+                               "RefHold", "RefReserve", "RefFus"};
+  if (ref_type < RefTypes::kNull || ref_type > RefTypes::kFus)
+    return "RefNull";
+  return refs[RefMap[static_cast<int>(ref_type)]];
+}
+
+// For writing out aggregate references (where they are ORed together).
+std::string FormatReferenceTypes(int ref_type) {
+  std::string out;
+  if (ref_type & static_cast<int>(RefTypes::kPred))
+    out += " Predicate";
+  if (ref_type & static_cast<int>(RefTypes::kUse))
+    out += " Use";
+  if (ref_type & static_cast<int>(RefTypes::kDef))
+    out += " Def";
+  if (ref_type & static_cast<int>(RefTypes::kKill))
+    out += " Kill";
+  if (ref_type & static_cast<int>(RefTypes::kUseDef))
+    out += " UseDef";
+  if (ref_type & static_cast<int>(RefTypes::kHold))
+    out += " Hold";
+  if (ref_type & static_cast<int>(RefTypes::kReserve))
+    out += " Reserve";
+  if (ref_type & static_cast<int>(RefTypes::kFus))
+    out += " Fus";
+  return formatv("<{0}>", out.substr(1));
+}
+
+//---------------------------------------------------------------------------
+// Create a subpool descriptor for a resource reference.
+//---------------------------------------------------------------------------
+SubPool::SubPool(const ResourceRef *res) {
+  if (res->IsGroupRef()) {
+    first_ = 0;
+    last_ = res->definition()->members().size() - 1;
+  } else {
+    first_ = res->first();
+    last_ = res->last();
+  }
+}
+
+//---------------------------------------------------------------------------
+// For each instruction that has no subunits specified, generate a default
+// subunit that references each of its register operands in the same pipeline
+// phase. Tie the subunit to a "fake" functional unit, and add that
+// functional unit instance to each CPU instance.
+//---------------------------------------------------------------------------
+// NOTE: If an instruction doesn't have any subunits, we won't have any
+// detailed functional unit or latency information for it.  The back-end
+// latency management will report "default" latencies, and will not have
+// any scheduling constraints for those instructions. If it's a "real"
+// instruction (vs a pseudo-instruction), thats probably a bad idea.  The
+// generation of default latency information is enabled by the
+// "gen_missing_info" command-line flag.  Recommended you not use it.
+//---------------------------------------------------------------------------
+// NOTE: In theory, different CPUs could have different "first execute"
+// stages, so we really ought to iterate over instructions separately for
+// each CPU. But thats -really- expensive and its generally very reasonable
+// to depend on the function finding "E1" in the spec-level phase table.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckInstructionSubunits() {
+  MdlItem item;
+  std::string unit = "$pseudo_unit";
+  int pseudo_subunits = 0;
+
+  for (auto *instr : instructions())
+    if (instr->subunits()->empty()) {
+      auto *refs = new ReferenceList;
+      // Create reference objects for each register-based operand. There's a
+      // few complexities to this. Register class operands can be embedded
+      // in other operands, and we need to properly represent the operand
+      // hierarchy in the reference. Register-specific references can just
+      // reference the register directly.
+      for (const auto *opnd : *instr->flat_operands()) {
+        auto *back = opnd->types()->back();
+        auto *front = opnd->types()->front();
+        OperandRef *ref_opnd = nullptr;
+        if (auto *rclass = FindItem(reg_classes(), back->name())) {
+          ref_opnd = new OperandRef(front, opnd->op_names(), rclass);
+        } else if (FindItem(registers(), back->name()) != nullptr) {
+          ref_opnd = new OperandRef(item, nullptr, new IdList(1, back));
+        }
+        RefType ref_type = opnd->is_input() ? RefTypes::kUse : RefTypes::kDef;
+        auto *phase = FindFirstExecutePhase(nullptr);
+        refs->push_back(new Reference(ref_type, phase, ref_opnd));
+      }
+      // Create an explicit reference to the functional unit.
+      auto *phase = FindFirstPhase();
+      refs->push_back(new Reference(RefTypes::kFus, phase, unit));
+
+      // We create new templates for the latency and subunit, and new
+      // instances for both, then add them to the appropriate spec tables.
+      auto lname = formatv("$latency{0}", pseudo_subunits);
+      auto sname = formatv("$subunit{0}", pseudo_subunits++);
+      auto *latency = new LatencyTemplate(lname, refs);
+      auto *instance = new LatencyInstance(lname);
+      auto *subunit = new SubUnitTemplate(sname, instance, latency);
+
+      // Add the latency and subunit templates to the global sets of units.
+      latencies().push_back(latency);
+      subunits().push_back(subunit);
+      su_instantiations()[sname] = new std::vector<SubUnitInstantiation *>;
+
+      // Add the latency and subunit templates to the dictionaries.
+      lat_map().emplace(latency->name(), latency);
+      su_map().emplace(subunit->name(), subunit);
+
+      // Add a subunit instance to the instruction.
+      instr->subunits()->push_back(new Identifier(sname));
+    }
+
+  if (pseudo_subunits == 0)
+    return;
+
+  // Add an implicitly defined functional unit template to the dictionary.
+  fu_map_.emplace(unit, new FuncUnitTemplate(new Identifier(unit)));
+
+  // Add the pseudo unit to the first cluster of each cpu instance.
+  for (auto *cpu : cpus())
+    if (!cpu->clusters()->empty())
+      cpu->clusters()->front()->func_units()->push_back(
+          new FuncUnitInstance(unit));
+}
+
+//---------------------------------------------------------------------------
+// If a subunit referenced a CPU in an fus clause, create a "fake" functional
+// unit and add the subunit to that.
+//---------------------------------------------------------------------------
+void MdlSpec::AddSubunitToCpu(CpuInstance *cpu, SubUnitTemplate *subunit) {
+  auto fu_temp_name = formatv("_default_:{0}", cpu->name());
+
+  // If the CPU doesn't have a catchall functional unit, create a CPU-specific
+  // functional unit template, and add an instance of it to the CPU.
+  auto *cluster = (*cpu->clusters())[0];
+  auto *fu = FindItem(*cluster->func_units(), fu_temp_name);
+  if (fu == nullptr) {
+    auto *fu_template = new FuncUnitTemplate(new Identifier(fu_temp_name));
+    fu_map_[fu_temp_name] = fu_template;
+    cluster->func_units()->push_back(fu = new FuncUnitInstance(fu_temp_name));
+    fu->set_template(fu_template);
+  }
+
+  // If the subunit hasn't been added previously, add it now.
+  if (!FindItem(*fu->get_template()->subunits(), subunit->name())) {
+    auto *instance = new SubUnitInstance(*subunit, subunit->type());
+    instance->set_template(subunit);
+    fu->get_template()->add_subunit_instance(instance);
+  }
+}
+
+//---------------------------------------------------------------------------
+//  Scan a reference list looking for functional unit references.
+//  Create a set of them and return the set.  Note: we will accept CPU names
+//  as well, and handle them properly.  We also expand functional unit groups.
+//---------------------------------------------------------------------------
+std::set<std::string> MdlSpec::FindLatencyFuncUnits(ReferenceList *references) {
+  std::set<std::string> fus;
+
+  for (auto *ref : *references) {
+    if (ref->IsConditionalRef()) {
+      ConditionalRef *cond;
+      for (cond = ref->conditional_ref(); cond; cond = cond->else_clause()) {
+        std::set<std::string> cond_fus = FindLatencyFuncUnits(&cond->refs());
+        fus.insert(cond_fus.begin(), cond_fus.end());
+      }
+    } else if (ref->ref_type() == RefTypes::kFus) {
+      for (auto *res : *ref->resources()) {
+        if (FindItem(fu_map_, res->name()) || FindItem(cpus_, res->name())) {
+          fus.insert(res->name());
+        } else if (auto *group = FindItem(fu_group_map(), res->name())) {
+          for (auto *fu : *group->members())
+            fus.insert(fu->name());
+        } else {
+          ErrorLog(res, "Invalid functional unit specifier: {0}", res->name());
+        }
+      }
+    }
+  }
+  return fus;
+}
+
+//---------------------------------------------------------------------------
+// Find the set of explicitly referenced FUS for each latency template.
+//---------------------------------------------------------------------------
+std::set<std::string> *MdlSpec::FindLatencyFuncUnits(LatencyTemplate *lat) {
+  if (lat->referenced_fus())
+    return lat->referenced_fus();
+
+  auto *fus = new std::set<std::string>;
+
+  // Find reference fu sets for base units first.
+  if (lat->base_ids())
+    for (auto *base : *lat->base_ids()) {
+      auto *base_fus = FindLatencyFuncUnits(lat_map_[base->name()]);
+      fus->insert(base_fus->begin(), base_fus->end());
+    }
+
+  auto fu_refs = FindLatencyFuncUnits(lat->references());
+  fus->insert(fu_refs.begin(), fu_refs.end());
+
+  lat->set_referenced_fus(fus);
+  return fus;
+}
+
+//---------------------------------------------------------------------------
+// For each functional unit template, enumerate all CPUs that instantiate it,
+// including all uses of the unit as a subunit of another template.
+//---------------------------------------------------------------------------
+void MdlSpec::FindFunctionalUnitClientCpus(FuncUnitTemplate *funit,
+                                           CpuInstance *cpu) {
+  funit->add_client_cpu(cpu->name());
+  for (auto *base : funit->unit_bases())
+    FindFunctionalUnitClientCpus(base, cpu);
+}
+
+void MdlSpec::FindFunctionalUnitClientCpus() {
+  for (auto *cpu : cpus_)
+    for (auto *cluster : *cpu->clusters())
+      for (auto *funit : *cluster->func_units())
+        FindFunctionalUnitClientCpus(funit->get_template(), cpu);
+}
+
+//---------------------------------------------------------------------------
+// For each CPU, build a vector of functional unit instances for each used
+// functional unit template.  This is used when we're writing out fus()
+// records.
+//---------------------------------------------------------------------------
+void MdlSpec::BuildFuncUnitInstancesMap() {
+  for (auto *cpu : cpus_)
+    for (auto *cluster : *cpu->clusters())
+      for (auto *funit : cluster->fu_instantiations()) {
+        auto &name = funit->func_type()->name();
+        if (!is_catchall_name(name) && name[0] != '&')
+          cpu->func_unit_instances()[name].push_back(funit);
+      }
+
+#if 0
+  for (auto *cpu : cpus_)
+    for (auto &[name, units] : cpu->func_unit_instances()) {
+      std::cout << formatv("{0} {1}: ", cpu->name(), name);
+      for (auto *unit : units)
+        std::cout << formatv("{0} ", unit->name());
+      std::cout << "\n";
+    }
+#endif
+}
+
+//---------------------------------------------------------------------------
+// In a "bottom-up" architecture definition, we don't have explicit template
+// definitions for functional units, and we need to tie latency "fus()"
+// references to the CPU's that contain instances of the referenced functional
+// units. We do that by creating (for each CPU) a CPU-specific "catchall"
+// functional unit template (and an instance) that instantiates all of the
+// subunits/latencies that reference that CPU's implicitly defined functional
+// units.
+//---------------------------------------------------------------------------
+void MdlSpec::TieSubUnitsToFunctionalUnits() {
+  // For each latency template, create a set of functional units it references.
+  for (auto *lat : latencies_)
+    FindLatencyFuncUnits(lat);
+
+  // For each functional unit template, find the CPU's that instantiate it.
+  FindFunctionalUnitClientCpus();
+
+  for (auto *subunit : subunits_)
+    for (auto *latency : *subunit->latencies())
+      for (auto &funit : *latency->get_template()->referenced_fus()) {
+        // If the latency references a CPU name, add the subunit to that CPU.
+        if (auto *cpu = FindItem(cpus_, funit)) {
+          AddSubunitToCpu(cpu, subunit);
+          continue;
+        }
+        // We only allow explicit functional unit references (fus) to
+        // reference implicitly defined functional units. Referencing an
+        // explicitly defined functional unit template is not supported,
+        // so we issue a warning and ignore the reference.
+        if (!fu_map_[funit]->is_implicitly_defined()) {
+          WarningLog(latency,
+                     "Invalid reference to an explicitly defined "
+                     "functional unit \"{0}\"",
+                     funit);
+          continue;
+        }
+        // If its a func_unit, add the subunit to all CPUs that have the
+        // functional unit as a client.
+        for (auto &cpu : fu_map_[funit]->client_cpus())
+          AddSubunitToCpu(FindItem(cpus_, cpu), subunit);
+      }
+}
+
+// Helper function for recursively adding derived subunits to instructions.
+static void AddDerivedSubUnits(InstructionDef *instruction,
+                               SubUnitTemplate *subunit) {
+  for (auto *derived_unit : subunit->derived_subunits()) {
+    instruction->add_subunit(derived_unit);
+    AddDerivedSubUnits(instruction, derived_unit);
+  }
+}
+
+// Helper function to determine if a regular expression has a prefix that we
+// can search for. Generally, this is anything up to the first metacharacter.
+// However, if the expression has a top level | or ? operator, we can't
+// define a prefix.
+static std::string GetPrefix(std::string &regex) {
+  static const char meta[] = "()^$*+?.[]\\{}";
+  auto first_meta = regex.find_first_of(meta);
+  if (first_meta == std::string::npos)
+    return regex;
+
+  int param = 0;
+  for (char ch : regex) {
+    if (ch == '(')
+      param++;
+    else if (ch == ')')
+      param--;
+    else if ((ch == '|' || ch == '?') && param == 0)
+      return "";
+  }
+
+  return regex.substr(0, first_meta);
+}
+
+//---------------------------------------------------------------------------
+// Given a list of regular expressions, add the subunit to each matched
+// instruction.  Following tablegen's format, these aren't *quite* regular
+// expressions in that they are always prefix searches - we must match
+// the whole instruction name.
+//---------------------------------------------------------------------------
+void MdlSpec::TieSubUnitToInstructions(SubUnitTemplate *subunit,
+                                       StringList *regex_bases) {
+  if (regex_bases == nullptr)
+    return;
+
+  // We can speed the searches where the expression has an alphanumeric prefix,
+  // by only searching names that begin with that prefix.
+  for (auto &regex : *regex_bases) {
+    auto prefix = GetPrefix(regex);
+    auto pattern = regex.substr(prefix.size());
+
+    std::optional<llvm::Regex> rex;
+    if (!pattern.empty()) {
+      if (pattern[0] != '^')
+        pattern = formatv("^({0})", pattern);
+      rex = llvm::Regex(pattern);
+    }
+
+    // If we see a prefix, we can narrow the range of instructions searched.
+    bool match = false;
+    auto end = instruction_map_.end();
+    auto begin = instruction_map_.begin();
+    if (!prefix.empty())
+      begin = instruction_map_.lower_bound(prefix);
+
+    // If we don't have a prefix, we need to search every single instruction.
+    if (prefix.empty()) {
+      for (auto itr = begin; itr != end; ++itr)
+        if (rex->match(itr->first)) {
+          itr->second->add_subunit(subunit);
+          match = true;
+        }
+    } else {
+      // If we have a prefix, only search instructions with that prefix.
+      for (auto itr = begin; itr != end; ++itr) {
+        if (itr->first.compare(0, prefix.size(), prefix) != 0)
+          break;
+        if (!rex || rex->match(itr->first.substr(prefix.size()))) {
+          itr->second->add_subunit(subunit);
+          match = true;
+        }
+      }
+    }
+    if (!match)
+      ErrorLog(subunit, "Unmatched base instruction expression \"{0}\"", regex);
+  }
+}
+
+//---------------------------------------------------------------------------
+// Tie each derived subunit to each instruction that uses any of its base
+// subunits (recursively).
+//---------------------------------------------------------------------------
+void MdlSpec::TieDerivedSubUnitsToInstructions() {
+  for (auto *instruction : instructions_)
+    if (auto *subunits = instruction->subunits()) {
+      IdList base_subunits = *subunits; // We're going to add to vector...
+      for (auto *subunit : base_subunits)
+        AddDerivedSubUnits(instruction, su_map()[subunit->name()]);
+    }
+}
+
+//---------------------------------------------------------------------------
+// If a merged definition has allocation information, make sure it's correct.
+//---------------------------------------------------------------------------
+ResourceRef *FuncUnitInstantiation::CheckAllocation(ResourceRef *def,
+                                                    ResourceRef *ref) {
+  int count = ref->pool_count();
+  Identifier *count_name = ref->pool_count_name();
+  Identifier *value_name = ref->value_name();
+
+  // Copy any allocation information from the reference to the definition.
+  def->set_value_name(value_name);
+  def->set_pool_count_name(count_name);
+  def->set_pool_count(count);
+
+  // Return if there is no allocation request, or the request is symbolic.
+  // (We will check symbolic sizes later).
+  if (!ref->HasCount() || count_name)
+    return def;
+
+  // Check array references.
+  // Array allocations must be non-zero and between 1 and the pool size.
+  if (def->IsArrayDef()) {
+    if (count == 0 || count > def->pool_size()) {
+      ErrorLog(ref, "Invalid resource allocation size: {0}", count);
+      return nullptr;
+    }
+    // The pool size must evenly divide the number of entries in the pool.
+    if (def->pool_size() % count != 0) {
+      ErrorLog(ref, "Pool count must evenly divide the resource pool size");
+      return nullptr;
+    }
+    return def;
+  }
+  // Allocation for everything else must be 1.
+  if (count != 1) {
+    ErrorLog(ref, "Invalid resource allocation size: {0}", count);
+    return nullptr;
+  }
+  return def;
+}
+
+//---------------------------------------------------------------------------
+// Given an incoming resource reference and a possibly-qualified use of
+// that resource, check that the qualification makes sense, and produce a
+// resultant resource reference in terms of the original resource.
+// Some important assumptions about incoming references:
+//     - Input pool references are represented with an explicit range.
+//     - Input references should have an associated definition.
+//     - Input references have already been error checked.
+// The following definition/reference combinations are supported:
+//   1.  name --> name                   // no change, always legal
+//   2.  name --> name.member            // name is a group & member exists
+//   3.  name[range] --> name[subrange]  // ok if subrange is legal
+//   4.  name[range] --> name[#]         // ok if index is legal
+//
+// "def" represents a resource definition, which is either an explicit
+// definition, or a template parameter bound to a definition.
+// "ref" is a possibly-qualified use of that resource in an instantiation
+// of a template.  This function returns a reference that represents the
+// fully qualified reference.
+//---------------------------------------------------------------------------
+ResourceRef *FuncUnitInstantiation::MergeRefs(ResourceRef *def,
+                                              ResourceRef *ref) {
+  if (def->IsNull())
+    return new ResourceRef(*def);
+
+  // Case 1: if the reference is unqualified, just return the def.
+  if (ref->IsUnqualifiedRef())
+    return CheckAllocation(new ResourceRef(*def), ref);
+
+  // Case 2: look up the member reference, and return a reference to the
+  // group's promoted resource.
+  if (def->IsGroupRef() && ref->member()) {
+    auto *mem = FindItem(def->definition()->members(), ref->member()->name());
+    if (mem == nullptr) {
+      ErrorLog(ref, "Resource member not found: {0}", ref->member()->name());
+      return nullptr;
+    }
+    auto *member = def->definition()->get_member_def(mem->index());
+    return new ResourceRef(member);
+  }
+
+  // Case 3 and 4: Ensure the subrange is a subset of the def's range.
+  // Note: All subranges are zero based relative to the original pool def.
+  // But in general we don't want successive qualifications to make the
+  // subrange larger.
+  if (def->IsArrayDef() && (ref->IsSubrange() || ref->IsIndexed())) {
+    if (ref->first() < def->first() || ref->last() > def->last()) {
+      if (ref->IsIndexed())
+        ErrorLog(ref, "Invalid resource pool index: {0}; expected [{1}..{2}]",
+                 ref->first(), def->first(), def->last());
+      else
+        ErrorLog(ref, "Invalid resource pool subrange");
+      return nullptr;
+    }
+    auto *qualified_ref = new ResourceRef(*def);
+    qualified_ref->set_subrange(ref->first(), ref->last());
+    return CheckAllocation(qualified_ref, ref);
+  }
+
+  // Member references cannot be further qualified.
+  if (def->member()) {
+    ErrorLog(ref, "Invalid member reference qualification");
+    return nullptr;
+  }
+  // Member references can only be used with grouped resources.
+  if (ref->member()) {
+    ErrorLog(ref, "Invalid member reference: {0}", ref->member()->name());
+    return nullptr;
+  }
+
+  // For everything else, check any pool allocations.
+  if (def->IsGroupDef() || def->IsArrayDef())
+    return CheckAllocation(new ResourceRef(*def), ref);
+
+  ErrorLog(ref, "Invalid resource qualifiers");
+  return nullptr;
+}
+
+//----------------------------------------------------------------------------
+// Implementations of FuncUnitInstantiation methods.
+//----------------------------------------------------------------------------
+
+// Create definition objects for each locally defined reference and port.
+void FuncUnitInstantiation::InstantiateLocalDefs() {
+  for (auto *res : *get_template()->resources())
+    resources().push_back(new ResourceDef(*res));
+  for (auto *port : *get_template()->ports())
+    ports().push_back(new ResourceDef(port));
+}
+
+// Look up a register class in the template's parameter list.
+RegisterClass *FuncUnitInstantiation::FindRegClass(Identifier *item) {
+  if (auto *arg = FindItem(class_args(), item->name()))
+    return arg->regs();
+  return nullptr;
+}
+
+// Bind a functional unit instantiation parameter to a register class.
+void FuncUnitInstantiation::BindClassArg(ResourceRef *arg) {
+  class_args()[arg->get_parameter()->name()] = BindFuncUnitClass(arg);
+}
+
+// Bind a functional unit instantiation parameter to a resource reference.
+void FuncUnitInstantiation::BindResourceArg(ResourceRef *arg) {
+  resource_args()[arg->get_parameter()->name()] = BindFuncUnitResource(arg);
+}
+
+// Map a functional unit instantiation parameter id to its bound class.
+RegisterClassRef *FuncUnitInstantiation::GetClassArg(int param_id) {
+  return class_args()[(*get_template()->params())[param_id]->name()];
+}
+
+// Map a functional unit instantiation parameter id to its bound resource.
+ResourceRef *FuncUnitInstantiation::GetResourceArg(int param_id) {
+  return resource_args()[(*get_template()->params())[param_id]->name()];
+}
+
+// Given a predicate for a subunit or latency instance, determine if it
+// matches the instantiation context's cpu name, functional unit name, or
+// functional unit template type.
+bool FuncUnitInstantiation::ValidPredicate(IdList *predicates) const {
+  if (predicates == nullptr)
+    return true;
+  for (auto *id : *predicates) {
+    if (id->name() == cpu()->name() || id->name() == instance()->name() ||
+        id->name() == func_type()->name() ||
+        spec()->IsValidInstructionPredicate(id->name()))
+      return true;
+    spec()->IsValidPredicateName(id);
+  }
+  return false;
+}
+
+//---------------------------------------------------------------------------
+// For each subunit instance in a functional unit instantiation, create a
+// subunit instantiation, bind its instance parameters, and instantiate
+// all of its latency instances.
+//---------------------------------------------------------------------------
+void FuncUnitInstantiation::InstantiateSubunits() {
+  for (auto *instance : *get_template()->subunits())
+    if (ValidPredicate(instance->predicates())) {
+      auto *subunit = new SubUnitInstantiation(this, instance);
+      BindSubUnitParameters(subunit);
+      subunit->InstantiateLatencies();
+      spec()->AddSubUnitInstantiation(subunit);
+    }
+}
+
+//---------------------------------------------------------------------------
+// Process connect statements. Find the connected resources and register
+// classes, do some error checking.
+//---------------------------------------------------------------------------
+void FuncUnitInstantiation::ProcessConnects() {
+  for (auto *connect : *get_template()->connections()) {
+    // First make sure the referenced port definition exists.
+    auto *port = FindItem(ports(), connect->name());
+    if (port == nullptr) {
+      ErrorLog(connect, "Port not found: {0}", connect->name());
+      return;
+    }
+
+    // If a register class is specified, find it, either as an incoming
+    // argument or globally defined.
+    if (connect->reg_class()) {
+      auto *name = connect->reg_class();
+      auto *reg_class = FindRegClass(name);
+      if (reg_class == nullptr)
+        reg_class = FindItem(spec()->reg_classes(), name->name());
+      if (reg_class == nullptr) {
+        ErrorLog(connect, "Register class not found: {0}", name->name());
+        return;
+      }
+      port->set_reg_class(reg_class);
+    }
+
+    // If a resource reference was provided, verify it.
+    if (auto *resource = connect->resource()) {
+      ResourceRef *ref = nullptr;
+      if (auto *res = FindItem(resource_args(), resource->name())) {
+        ResourceRef arg(*res);
+        ref = MergeRefs(&arg, resource);
+      } else if (auto *def = FindItem(resources(), resource->name())) {
+        ResourceRef arg(def);
+        ref = MergeRefs(&arg, resource);
+      }
+
+      if (ref == nullptr)
+        ErrorLog(connect, "Resource not found: {0}", resource->name());
+      else
+        port->set_port_resource(ref);
+    }
+    if (ErrorsSeen())
+      return;
+  }
+}
+
+//---------------------------------------------------------------------------
+// Bind a subunit instance port argument to its definition.
+// Return the definition if found, if any errors are found return nullptr.
+//---------------------------------------------------------------------------
+ResourceDef *FuncUnitInstantiation::BindSubUnitPort(ResourceRef *arg) {
+  if (arg->IsNull())
+    return NullPortDef;
+  if (auto *port_arg = FindItem(ports(), arg->name()))
+    return port_arg;
+
+  ErrorLog(arg, "Port argument not found: {0}", arg->name());
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Bind a subunit resource argument to its definition.
+// Return the definition if found, if any errors are found return nullptr.
+//---------------------------------------------------------------------------
+ResourceRef *FuncUnitInstantiation::BindSubUnitResource(ResourceRef *arg) {
+  // If this is a "null" binding, just return null.
+  if (arg->IsNull())
+    return NullResourceRef;
+
+  // Search for the resource definition in arguments and FU-level definitions.
+  if (auto *resource = FindItem(resource_args(), arg->name())) {
+    ResourceRef ref(*resource);
+    return MergeRefs(&ref, arg);
+  }
+  if (auto *resource = FindItem(resources(), arg->name())) {
+    ResourceRef def(resource);
+    return MergeRefs(&def, arg);
+  }
+
+  ErrorLog(arg, "Resource argument not found: {0}", arg->name());
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Bind a functional unit instance resource argument to its definition.
+// Return the definition if found, if any errors are found return nullptr.
+//---------------------------------------------------------------------------
+ResourceRef *FuncUnitInstantiation::BindFuncUnitResource(ResourceRef *arg) {
+  // If this is a "null" binding, just return null.
+  if (arg->IsNull())
+    return NullResourceRef;
+
+  // Search for resource definition in the cluster and CPU level.
+  ResourceDef *def;
+  if ((def = FindItem(*cluster()->resources(), arg->name())) ||
+      (def = FindItem(*cpu()->resources(), arg->name()))) {
+    ResourceRef ref(def);
+    return MergeRefs(&ref, arg);
+  }
+
+  ErrorLog(arg, "Resource argument not found: {0}", arg->name());
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Bind a functional unit instance register class argument to its definition.
+// Return the definition if found, if any errors are found return nullptr.
+//---------------------------------------------------------------------------
+RegisterClassRef *FuncUnitInstantiation::BindFuncUnitClass(ResourceRef *arg) {
+  // If this is a "null" binding, just return null.
+  if (arg->IsNull())
+    return new RegisterClassRef(NullRegisterClass);
+
+  // Look up the register class in the global class table.
+  if (auto *item = FindItem(spec()->reg_classes(), arg->name()))
+    return new RegisterClassRef(item);
+
+  // If we don't find the class, but find a register definition, create a
+  // custom class that contains just that single register (a common case).
+  if (RegisterDef *reg = FindItem(spec()->registers(), arg->name())) {
+    auto *members = new RegisterDefList;
+    members->push_back(reg);
+    std::string newname = "[" + arg->name() + "]";
+    return new RegisterClassRef(
+        new RegisterClass(*arg, new Identifier(newname), members));
+  }
+
+  ErrorLog(arg, "Register class argument not found: {0}", arg->name());
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Bind functional unit instantiation parameters to resources and classes.
+//---------------------------------------------------------------------------
+void FuncUnitInstantiation::BindFuncUnitParameters() {
+  auto &instance_args = *instance()->args();
+  int num_params = instance_args.size();
+
+  // Iterate over the parameters, bind the parameters of the instance
+  // to the objects (register classes or resources) they refer to.
+  for (int argid = 0; argid < num_params; argid++) {
+    if (instance_args[argid]->get_parameter()->IsResource()) {
+      BindResourceArg(instance_args[argid]);
+    } else {
+      BindClassArg(instance_args[argid]);
+    }
+  }
+}
+
+//---------------------------------------------------------------------------
+// Look up functional unit pinning resources.
+//---------------------------------------------------------------------------
+void FuncUnitInstantiation::BindFuncUnitSlotResources() {
+  IdList *slots_any = instance()->pin_any();
+  IdList *slots_all = instance()->pin_all();
+  IdList *slots = slots_any ? slots_any : slots_all;
+  IdList *implicit_slots = nullptr;
+
+  // If the instance wasn't pinned to any slots, and slots have been
+  // declared for this cpu/cluster, create an "any" set of resources.
+  if (slots == nullptr && (cluster()->issues() || cpu()->issues())) {
+    auto *issues = cluster()->issues() ? cluster()->issues() : cpu()->issues();
+    slots = slots_any = implicit_slots = new IdList;
+    for (auto *res : *issues)
+      slots->push_back(res->id());
+  }
+
+  // Find the definition of any pin reference.
+  auto resource_list = new ResourceRefList;
+  ResourceDef *res;
+  for (auto *slot : *slots) {
+    if ((res = FindItem(*cluster()->issues(), slot->name())) ||
+        (res = FindItem(*cpu()->issues(), slot->name())))
+      resource_list->push_back(new ResourceRef(res));
+    else
+      ErrorLog(res, "Issue slot resource not found: {0}", slot->name());
+  }
+
+  // Add the slot references to the functional unit instance.
+  if (slots_any)
+    instance()->set_resource_slots_any(resource_list);
+  else
+    instance()->set_resource_slots_all(resource_list);
+
+  if (implicit_slots != nullptr)
+    delete implicit_slots;
+}
+
+//---------------------------------------------------------------------------
+// Bind subunit instantiation parameters to ports and resources.
+//---------------------------------------------------------------------------
+void FuncUnitInstantiation::BindSubUnitParameters(SubUnitInstantiation *su) {
+  auto &instance_args = *su->subunit()->args();
+  int num_params = instance_args.size();
+
+  for (int argid = 0; argid < num_params; argid++)
+    if (instance_args[argid]->get_parameter()->IsResource())
+      su->BindResourceArg(instance_args[argid]);
+    else
+      su->BindPortArg(instance_args[argid]);
+}
+
+//---------------------------------------------------------------------------
+// Implementation of SubUnitInstantiation methods.
+//---------------------------------------------------------------------------
+
+// Bind a port definition to the specified subunit instantiation parameter.
+void SubUnitInstantiation::BindPortArg(ResourceRef *arg) {
+  port_args()[arg->get_parameter()->name()] = func_unit()->BindSubUnitPort(arg);
+}
+
+// Bind a resource definition to the specified subunit instantiation parameter.
+void SubUnitInstantiation::BindResourceArg(ResourceRef *arg) {
+  resource_args()[arg->get_parameter()->name()] =
+      func_unit()->BindSubUnitResource(arg);
+}
+
+// Map a subunit instantiation parameter id to its bound resource.
+ResourceRef *SubUnitInstantiation::GetResourceArg(int param_id) {
+  return resource_args()[(*su_template()->params())[param_id]->name()];
+}
+
+// Map a subunit instantiation parameter id to its bound port.
+ResourceDef *SubUnitInstantiation::GetPortArg(int param_id) {
+  return port_args()[(*su_template()->params())[param_id]->name()];
+}
+
+// Given a predicate for a latency instance, determine if it matches the
+// instantiation context's cpu name or functional unit name.
+bool SubUnitInstantiation::ValidPredicate(IdList *predicates) const {
+  return func_unit()->ValidPredicate(predicates);
+}
+
+//---------------------------------------------------------------------------
+// Bind a latency instance port argument to its definition.
+// Return the definition if found, if any errors are found return nullptr.
+//---------------------------------------------------------------------------
+ResourceDef *SubUnitInstantiation::BindLatPort(ResourceRef *arg) {
+  if (arg->IsNull())
+    return NullPortDef;
+  if (auto *port_arg = FindItem(port_args(), arg->name()))
+    return port_arg;
+
+  ErrorLog(arg, "Port argument not found: {0}", arg->name());
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Bind a latency resource argument to its definition.
+// Return the definition if found, if any errors are found return nullptr.
+//---------------------------------------------------------------------------
+ResourceRef *SubUnitInstantiation::BindLatResource(ResourceRef *arg) {
+  // If this is a "null" binding, just return null.
+  if (arg->IsNull())
+    return NullResourceRef;
+
+  // Search for the resource definition in arguments an SU-level definitions.
+  if (auto *resource = FindItem(resource_args(), arg->name())) {
+    ResourceRef ref(*resource);
+    return func_unit()->MergeRefs(&ref, arg);
+  }
+
+  ErrorLog(arg, "Resource argument not found: {0}", arg->name());
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Bind latency instantiation parameters to ports and resources.
+//---------------------------------------------------------------------------
+void SubUnitInstantiation::BindLatencyParams(LatencyInstantiation *lat) {
+  auto &instance_args = *lat->latency()->args();
+  int num_params = instance_args.size();
+
+  for (int argid = 0; argid < num_params; argid++) {
+    if (instance_args[argid]->get_parameter()->IsResource()) {
+      lat->BindResourceArg(instance_args[argid]);
+    } else {
+      lat->BindPortArg(instance_args[argid]);
+    }
+  }
+}
+
+//---------------------------------------------------------------------------
+// Bind latency reference resources to template parameters.
+//---------------------------------------------------------------------------
+void SubUnitInstantiation::BindLatencyResources(LatencyInstantiation &lat,
+                                                Reference *reference,
+                                                ResourceRefList *resources) {
+  for (auto *res : *resources) {
+    ResourceRef *ref = nullptr;
+    if (auto *resource = FindItem(lat.resource_args(), res->name())) {
+      ref = func_unit()->MergeRefs(resource, res);
+    } else if (auto *port = FindItem(lat.port_args(), res->name())) {
+      reference->add_port(port);
+      if (auto *port_res = port->port_resource())
+        ref = func_unit()->MergeRefs(port_res, res);
+    } else if (!res->IsNull()) {
+      ErrorLog(res, "Resource undefined: {0}", res->name());
+    }
+    // If we have a valid resource reference, add it to the reference.
+    if (ref == nullptr)
+      continue;
+    reference->add_resource(ref);
+
+    // Check for unqualified pool/group references.  If it's a group
+    // reference, either use all the members or just one, depending on
+    // how the group was defined. If it's an array reference, print an
+    // error message.
+    if (ref->IsPooledResourceRef() && !res->use_all_members()) {
+      if (ref->IsGroupRef()) {
+        if (ref->definition()->group_type() == GroupType::kUseAll)
+          ref->set_use_all_members();
+        else
+          ref->set_pool_count(1); // Set pool allocation for group to 1.
+      } else {                    // It's an array reference.
+        ErrorLog(reference,
+                 "Unqualified pool - use :* to reference whole pool: {0}",
+                 ref->ToString());
+      }
+    }
+  }
+}
+
+//---------------------------------------------------------------------------
+// Recursively copy conditional references and all their references and
+// else clauses.
+//---------------------------------------------------------------------------
+ConditionalRef *
+SubUnitInstantiation::CopyLatencyCondReference(LatencyInstantiation &lat,
+                                               ConditionalRef *cond) {
+  // Copy the else clause, if there is one.
+  if (cond == nullptr)
+    return nullptr;
+  auto *else_clause = CopyLatencyCondReference(lat, cond->else_clause());
+
+  // Make a copy of the conditional reference (and the copied else clause),
+  // and copy the references associated with this condition.
+  auto *copy = new ConditionalRef(cond, else_clause);
+  for (auto *ref : cond->refs())
+    CopyLatencyReference(lat, copy->refs(), ref);
+  return copy;
+}
+
+//---------------------------------------------------------------------------
+// When instantiating a latency, copy each reference, bind resources to
+// instance parameters, and do some error checking.
+//---------------------------------------------------------------------------
+void SubUnitInstantiation::CopyLatencyReference(LatencyInstantiation &lat,
+                                                ReferenceList &references,
+                                                Reference *ref) {
+  if (!ValidPredicate(ref->predicates()))
+    return;
+
+  // Recursively copy conditional references.
+  if (ref->IsConditionalRef()) {
+    auto *cond = CopyLatencyCondReference(lat, ref->conditional_ref());
+    references.push_back(new Reference(ref, cond));
+    return;
+  }
+
+  // If the reference doesn't have a phase expression, give it one.
+  auto *phase = ref->phase_expr();
+  if (phase == nullptr)
+    phase = new PhaseExpr(spec_->FindFirstExecutePhase(cpu()));
+  else
+    phase = phase->clone();
+
+  // Copy the reference, and for each resource reference, bind the named
+  // resource to the value passed into the subunit instance resource or
+  // port parameter.
+  auto *new_reference = new Reference(ref, phase);
+  if (!ref->IsFuncUnitRef())
+    BindLatencyResources(lat, new_reference, ref->resources());
+
+  // Currently we don't allow holds/reserves on pooled resources.
+  if (new_reference->ref_type() & (RefTypes::kHold | RefTypes::kReserve))
+    for (auto *res : *new_reference->resources())
+      if (res->HasCount())
+        ErrorLog(ref, "Hold/reserve not supported on pool references: {0}",
+                 res->ToString());
+
+  references.push_back(new_reference);
+}
+
+//---------------------------------------------------------------------------
+// Add references from a latency template to a subunit. lat_template is
+// passed in explicitly so that we can instantiate parents and bases.
+//---------------------------------------------------------------------------
+void SubUnitInstantiation::InstantiateLatency(LatencyInstantiation &lat,
+                                              LatencyTemplate *lat_template) {
+  for (auto *reference : *lat_template->references())
+    CopyLatencyReference(lat, references(), reference);
+}
+
+//---------------------------------------------------------------------------
+// Instantiate a latency template and all of its bases, recursively.
+//---------------------------------------------------------------------------
+void SubUnitInstantiation::InstantiateLatencyBases(LatencyInstantiation &lat,
+                                                   LatencyTemplate *parent,
+                                                   LatencyList &bases) {
+  // There's no need to instantiate a latency template in a particular
+  // subunit more than once (which is possible if you have multiple bases,
+  // or recursive bases).
+  if (std::find(bases.begin(), bases.end(), parent) != bases.end())
+    return;
+  bases.push_back(parent);
+
+  InstantiateLatency(lat, parent);
+  if (ErrorsSeen())
+    return;
+
+  for (auto *base : parent->unit_bases()) {
+    InstantiateLatencyBases(lat, base, bases);
+    if (ErrorsSeen())
+      return;
+  }
+}
+
+//---------------------------------------------------------------------------
+// Instantiate all the latencies (and latency bases) associated with a
+// subunit instantiation.
+//---------------------------------------------------------------------------
+void SubUnitInstantiation::InstantiateLatencies() {
+  if (su_template()->latencies() == nullptr)
+    return;
+
+  for (auto *instance : *su_template()->latencies())
+    if (ValidPredicate(instance->predicates())) {
+      LatencyInstantiation latency(this, instance);
+      BindLatencyParams(&latency);
+      if (ErrorsSeen())
+        return;
+      LatencyList bases; // used to avoid duplicates and recursion.
+      InstantiateLatencyBases(latency, instance->get_template(), bases);
+      if (ErrorsSeen())
+        return;
+    }
+}
+
+//---------------------------------------------------------------------------
+// Implementation of LatencyInstantiation methods.
+//---------------------------------------------------------------------------
+
+// Bind a port definition to the specified latency instantiation parameter.
+void LatencyInstantiation::BindPortArg(ResourceRef *arg) {
+  port_args()[arg->get_parameter()->name()] = subunit()->BindLatPort(arg);
+}
+
+// Bind a resource definition to the specified latency instantiation parameter.
+void LatencyInstantiation::BindResourceArg(ResourceRef *arg) {
+  resource_args()[arg->get_parameter()->name()] =
+      subunit()->BindLatResource(arg);
+}
+
+// Map a latency instantiation parameter id to its bound resource.
+ResourceRef *LatencyInstantiation::GetResourceArg(int param_id) {
+  return resource_args()[(*lat_template()->params())[param_id]->name()];
+}
+
+// Map a latency instantiation parameter id to its bound port.
+ResourceDef *LatencyInstantiation::GetPortArg(int param_id) {
+  return port_args()[(*lat_template()->params())[param_id]->name()];
+}
+
+//----------------------------------------------------------------------------
+// Implementations of MdlSpec methods.
+//----------------------------------------------------------------------------
+
+// Create and add a Functional Unit instantiation to the mdl spec table.
+FuncUnitInstantiation *
+MdlSpec::AddFuncUnitInstantiation(CpuInstance *cpu, ClusterInstance *cluster,
+                                  FuncUnitInstance *fu_inst) {
+  auto *fu = new FuncUnitInstantiation(this, cpu, cluster, fu_inst);
+  cluster->AddFuncUnitInstantiation(fu);
+  return fu;
+}
+
+// Create a base function unit instance object and add to mdl spec table.
+FuncUnitInstantiation *
+MdlSpec::AddFuncUnitBaseInstantiation(FuncUnitInstantiation *parent,
+                                      FuncUnitTemplate *base) {
+  auto *fu = new FuncUnitInstantiation(parent, base);
+  parent->cluster()->AddFuncUnitInstantiation(fu);
+  return fu;
+}
+
+// Recursively add base functional units to instantiated parents.
+void MdlSpec::AddFunctionalUnitBases(FuncUnitInstantiation *parent) {
+  auto *root = parent->get_template();
+  for (auto *base : root->unit_bases()) {
+    if (base == root) {
+      WarningLog(base, "Recursive functional unit derivation, ignored");
+      continue;
+    }
+    if (!base->unit_bases().empty())
+      WarningLog(base, "Nested functional unit derivation, ignored");
+
+    auto *fu = AddFuncUnitBaseInstantiation(parent, base);
+    fu->set_resource();
+    fu->ProcessConnects();
+    fu->InstantiateSubunits();
+  }
+}
+
+// Instantiate a single functional unit, and all of its base units.
+void MdlSpec::InstantiateFunctionalUnit(CpuInstance *cpu,
+                                        ClusterInstance *cluster,
+                                        FuncUnitInstance *fu) {
+  auto *fu_top = AddFuncUnitInstantiation(cpu, cluster, fu);
+  fu_top->set_resource();
+
+  // Bind parameters to their associated definitions, check for errors.
+  // If errors found, don't try to instantiate any subunits.
+  fu_top->BindFuncUnitParameters();
+  if (ErrorsSeen())
+    return;
+
+  // After processing parameters, promote groups.
+  PromoteResourceGroupMembers(&fu_top->resources(), nullptr,
+                              &fu_top->resource_args());
+
+  // Bind pinning resources.
+  fu_top->BindFuncUnitSlotResources();
+
+  // Process connect statements and instantiate subunits.
+  fu_top->ProcessConnects();
+  fu_top->InstantiateSubunits();
+
+  // For each base unit, create a separate instantiation with the same
+  // parameters as the parent, and instantiate its subunits.
+  AddFunctionalUnitBases(fu_top);
+}
+
+// Instantiate every functional unit instance (in every CPU and cluster).
+// Simply abort if any errors are found.
+void MdlSpec::InstantiateFunctionalUnits() {
+  for (auto *cpu : cpus())
+    for (auto *cluster : *cpu->clusters())
+      for (auto *fu_inst : *cluster->func_units())
+        InstantiateFunctionalUnit(cpu, cluster, fu_inst);
+
+  if (ErrorsSeen())
+    Abort();
+}
+
+bool IsValidPoolCount(const ResourceRef *resource, const Reference *ref,
+                      int count, const SubUnitInstantiation *subunit) {
+  if (count == 0)
+    return false;
+  if (count < 0) {
+    subunit->ErrorLog(ref, "Negative allocation size");
+    return false;
+  }
+  if (count > resource->pool_size()) {
+    subunit->ErrorLog(ref, "Allocation size exceeds resource pool size: {0}",
+                      resource->ToString());
+    return false;
+  }
+  return true;
+}
+
+// Update the subpool reference table for this reference.
+void ResourceDef::AddReferenceSizeToPool(const ResourceRef *resource,
+                                         const Reference *ref,
+                                         const SubUnitInstantiation *subunit) {
+  SubPool pool(resource);
+  auto &pool_info = sub_pool(pool);
+
+  // If the pool has a defined pool count, just use it.
+  if (resource->pool_count() != -1) {
+    if (IsValidPoolCount(resource, ref, resource->pool_count(), subunit)) {
+      pool_info.add_count(resource->pool_count());
+      return;
+    }
+  }
+
+  // If we have no idea what a symbolic size attribute is, just record the
+  // worst case number (the whole pool).
+  if (ref == nullptr || ref->operand() == nullptr) {
+    pool_info.add_count(resource->pool_size());
+    return;
+  }
+
+  // Find the whole derivation of the operand (if any).
+  auto *opnd_ref = ref->operand();
+  auto *opnd_base = opnd_ref->operand_decl()->operand();
+  auto *opnd_def = opnd_ref->operand();
+  OperandDefList opnds;
+  if (!FindDerivation(opnd_def, opnd_base, opnds))
+    return; // This is a panic, which will have already been seen and reported.
+
+  // Make sure we find at least one occurrence of the named attribute.
+  // If it's not found, set a worst-case pool count.
+  auto &count_name = resource->pool_count_name()->name();
+  OperandAttribute *attr = nullptr;
+  for (auto *opnd : opnds)
+    if ((attr = FindAttribute(count_name, opnd, subunit)) != nullptr)
+      break;
+  if (attr == nullptr) {
+    pool_info.add_count(resource->pool_size());
+    return;
+  }
+
+  // Walk through all the operand derivations, and find values associated with
+  // the attribute, and add them to the resource pools definition.
+  for (auto *opnd : opnds)
+    for (auto *op_attr : *opnd->attributes())
+      if (op_attr->name() == count_name)
+        if (subunit->ValidPredicate(op_attr->predicate()))
+          if (IsValidPoolCount(resource, ref, op_attr->values(0), subunit))
+            pool_info.add_count(op_attr->values(0));
+}
+
+// Annotate a resource with the attributes of a reference to it.
+void ResourceDef::RecordReference(RefType type, const PhaseExpr *expr,
+                                  const ResourceRef *resource,
+                                  const Reference *ref,
+                                  const SubUnitInstantiation *subunit) {
+  // For pools, record each pool size and the number of resources requested.
+
+  if (resource != nullptr && resource->HasCount()) {
+    AddReferenceSizeToPool(resource, ref, subunit);
+    add_alloc_size(resource->pool_count());
+  }
+
+  ref_types_ |= type;
+  if (!expr->IsExpressionConstant()) {
+    phase_expr_seen_ = true;
+    return;
+  }
+  int phase = expr->EvaluateConstantExpression();
+  int cycles = ref ? (ref->use_cycles() - 1) : 0;
+  if (earliest_ref_ == -1 || phase < earliest_ref_)
+    earliest_ref_ = phase;
+  if (latest_ref_ == -1 || phase + cycles > latest_ref_)
+    latest_ref_ = phase + cycles;
+}
+
+// Add a nice debug name to a resource definition.
+void ResourceDef::set_debug_name(std::string type, const CpuInstance *cpu,
+                                 const ClusterInstance *cluster,
+                                 const FuncUnitInstantiation *fu) {
+  std::string cpus = cpu ? formatv("{0}.", cpu->name()) : "";
+  std::string cls = cluster ? formatv("{0}.", cluster->name()) : "";
+  std::string fus = fu ? formatv("{0}.", fu->name()) : "";
+  debug_name_ = formatv("{0}.{1}{2}{3}{4}", type, cpus, cls, fus, name());
+}
+
+// Assign a resource id to a resource definition.
+// Note that we don't assign ids to groups or their members, they are subsumed
+// by their promoted members.
+static int AssignId(ResourceDef *def, int resource_id) {
+  def->set_resource_id(resource_id);
+  if (def->pool_size() > 0)
+    return resource_id + def->pool_size();
+  return resource_id + 1;
+}
+
+// Assign resource ids to each resource and port defined in the input.
+// Note: We don't assign ids to global resources, since they are all copied
+// into each CPU's resource set.
+void MdlSpec::AssignResourceIds() {
+  for (auto *cpu : cpus()) {
+    // Add resources defined for each functional unit in each cluster.
+    // Make note of the last functional unit we see for each CPU.
+    ResourceDef *last_fu = nullptr;
+    for (auto *cluster : *cpu->clusters())
+      for (auto *fu : cluster->fu_instantiations()) {
+        if (!fu->instance()->is_catchall_unit()) {
+          last_fu = fu->get_resource();
+          cpu->add_cpu_resource(last_fu, "Funcunit", cpu, cluster, nullptr);
+        }
+      }
+
+    // Add resources defined for issue slots.
+    for (auto *res : *cpu->issues())
+      cpu->add_cpu_resource(res, "Issue", cpu, nullptr, nullptr);
+
+    for (auto *cluster : *cpu->clusters())
+      for (auto *res : *cluster->issues())
+        cpu->add_cpu_resource(res, "Issue", cpu, cluster, nullptr);
+
+    // Add all resources defined at the CPU level.
+    for (auto *res : *cpu->resources())
+      cpu->add_cpu_resource(res, "Resource", cpu, nullptr, nullptr);
+
+    // Add resources defined in clusters and function unit instantiations.
+    // Note that we don't add ports to this list, since they don't need
+    // resource ids, and aren't first-class resources.
+    for (auto *cluster : *cpu->clusters()) {
+      for (auto *res : *cluster->resources())
+        cpu->add_cpu_resource(res, "Resource", cpu, cluster, nullptr);
+      for (auto *fu : cluster->fu_instantiations())
+        for (auto *resource : fu->resources())
+          cpu->add_cpu_resource(resource, "Resource", cpu, cluster, fu);
+    }
+
+    // Add one fake resource to mark the end of the list.
+    cpu->add_cpu_resource(new ResourceDef("end"), "fake", cpu, nullptr,
+                          nullptr);
+
+    // We've collected all the resources together, assign ids.
+    // We skip resource groups, since their members were promoted to
+    // regular resources.
+    int resource_id = 1;
+    for (auto *res : cpu->all_resources())
+      if (!res->IsGroupDef())
+        resource_id = AssignId(res, resource_id);
+
+    // For each CPU, note the resource id of the last functional unit.
+    if (last_fu)
+      cpu->set_max_fu_id(last_fu->get_resource_id());
+  }
+}
+
+// Assign pool ids (per CPU) to each resource that defines a pool.
+void MdlSpec::AssignPoolIds() {
+  for (auto *cpu : cpus()) {
+    int pool_id = 0;
+    for (auto *res : cpu->all_resources())
+      if (res->IsPoolDef()) {
+        res->set_pool_id(pool_id);
+        cpu->add_pool_resource(res);
+        for (auto &[pool, pool_info] : res->sub_pools()) {
+          pool_info.set_subpool_id(pool_id);
+          pool_id += *pool_info.counts().rbegin();
+        }
+      }
+    cpu->set_pool_count(pool_id);
+  }
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/MdlCompiler/mdl.g4 b/llvm/utils/MdlCompiler/mdl.g4
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl.g4
@@ -0,0 +1,525 @@
+//===- mdl.g4 - Antlr4 grammar for the MDL language -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//---------------------------------------------------------------------------
+// Grammar for the MPACT Machine Description Language.
+//
+// This file is used by ANTLR4 to create a recursive descent parser for the
+// specified input language, which produces a parse tree representing a
+// parsed input file. See README for more information.
+//
+// IF YOU CHANGE THIS FILE, YOU SHOULD ASSUME THAT YOU WILL HAVE TO MAKE
+// CHANGES IN DOWNSTREAM CLIENTS (in particular, mdl_visitor.*) THAT REFLECT
+// THE STRUCTURE AND TYPE OF THE PARSE TREE.
+//
+// For more infomation about ANTLR4 grammars, see:
+//      github.com/antlr/antlr4/blob/master/doc/index.md
+// For a good tutorial of how to use ANTLR4, see "The ANTLR Mega Tutorial"
+//      tomassetti.me/antlr-mega-tutorial
+//---------------------------------------------------------------------------
+
+grammar mdl;
+
+//---------------------------------------------------------------------------
+// Top level production for entire file.
+//---------------------------------------------------------------------------
+architecture_spec       : architecture_item+ EOF
+                        ;
+architecture_item       : family_name
+                        | cpu_def
+                        | register_def
+                        | register_class
+                        | resource_def
+                        | pipe_def
+                        | func_unit_template
+                        | func_unit_group
+                        | subunit_template
+                        | latency_template
+                        | instruction_def
+                        | operand_def
+                        | derived_operand_def
+                        | import_file
+                        | predicate_def
+                        ;
+
+//---------------------------------------------------------------------------
+// We support import files at the top-level in the grammar. These will be
+// handled by the visitor, so the containing file is completely parsed
+// before handling any imported files.
+//---------------------------------------------------------------------------
+import_file             : IMPORT STRING_LITERAL
+                        ;
+
+//---------------------------------------------------------------------------
+// Define a processor family name, used to interact with target compiler.
+//---------------------------------------------------------------------------
+family_name             : FAMILY ident ';'
+                        ;
+
+//---------------------------------------------------------------------------
+// Top-level CPU instantiation.
+//---------------------------------------------------------------------------
+cpu_def                 : (CPU | CORE) ident
+                             ('(' STRING_LITERAL (',' STRING_LITERAL)* ')')?
+                             '{' cpu_stmt* '}' ';'?
+                        ;
+cpu_stmt                : pipe_def
+                        | resource_def
+                        | reorder_buffer_def
+                        | issue_statement
+                        | cluster_instantiation
+                        | func_unit_instantiation
+                        | forward_stmt
+                        ;
+
+reorder_buffer_def      : REORDER_BUFFER '<' size=number '>' ';'
+                        ;
+
+//---------------------------------------------------------------------------
+// Cluster specification.
+//---------------------------------------------------------------------------
+cluster_instantiation   : CLUSTER cluster_name=ident '{' cluster_stmt+ '}' ';'?
+                        ;
+cluster_stmt            : resource_def
+                        | issue_statement
+                        | func_unit_instantiation
+                        | forward_stmt
+                        ;
+issue_statement         : ISSUE '(' start=ident ('..' end=ident)? ')'
+                          name_list ';'
+                        ;
+
+//---------------------------------------------------------------------------
+// Functional unit instantiation (in CPUs and Clusters).
+//---------------------------------------------------------------------------
+func_unit_instantiation : FUNCUNIT type=func_unit_instance
+                          bases=func_unit_bases*
+                          name=ident '(' resource_refs? ')'
+                          ('->' (one=pin_one | any=pin_any | all=pin_all))?  ';'
+                        ;
+pin_one                 : ident              // avoid ambiguity with any/all.
+                        ;
+pin_any                 : ident ('|' ident)+
+                        ;
+pin_all                 : ident ('&' ident)+
+                        ;
+func_unit_instance      : ident (unreserved='<>' | ('<' buffered=number '>'))?
+                        ;
+func_unit_bases         : ':' func_unit_instance
+                        ;
+
+//---------------------------------------------------------------------------
+// A single forwarding specification (in CPUs and Clusters).
+//---------------------------------------------------------------------------
+forward_stmt            : FORWARD from_unit=ident '->'
+                                      forward_to_unit (',' forward_to_unit)* ';'
+                        ;
+forward_to_unit         : ident ('(' cycles=snumber ')')?
+                        ;
+
+//---------------------------------------------------------------------------
+// Functional unit template definition.
+//---------------------------------------------------------------------------
+func_unit_template      : FUNCUNIT type=ident base=base_list
+                                '(' func_unit_params? ')'
+                                '{' func_unit_template_stmt* '}' ';'?
+                        ;
+func_unit_params        : fu_decl_item (';' fu_decl_item)*
+                        ;
+fu_decl_item            : RESOURCE  name_list
+                        | CLASS     name_list
+                        ;
+func_unit_template_stmt : resource_def
+                        | port_def
+                        | connect_stmt
+                        | subunit_instantiation
+                        ;
+port_def                : PORT port_decl (',' port_decl)* ';'
+                        ;
+port_decl               : name=ident ('<' reg_class=ident '>')?
+                             ('(' ref=resource_ref ')')?
+                        ;
+
+connect_stmt            : CONNECT port=ident
+                             (TO reg_class=ident)? (VIA resource_ref)? ';'
+                        ;
+
+
+//---------------------------------------------------------------------------
+// Functional unit group definition.
+//---------------------------------------------------------------------------
+func_unit_group         : FUNCGROUP name=ident ('<' buffered=number '>')?
+                                  ':' members=name_list ';'
+                        ;
+
+//---------------------------------------------------------------------------
+// Other FU statements, we may not need these.
+//---------------------------------------------------------------------------
+// local_connect <port> TO <regclass_name>.
+
+//---------------------------------------------------------------------------
+// Definition of subunit template instantiation.
+//---------------------------------------------------------------------------
+subunit_instantiation   : (predicate=name_list ':')? subunit_statement
+                        | predicate=name_list ':'
+                             '{' subunit_statement* '}' ';'?
+                        ;
+
+subunit_statement       : SUBUNIT subunit_instance (',' subunit_instance)* ';'
+                        ;
+
+subunit_instance        : ident  '(' resource_refs? ')'
+                        ;
+
+//---------------------------------------------------------------------------
+// Definition of subunit template definition.
+//---------------------------------------------------------------------------
+subunit_template        : SUBUNIT name=ident base=su_base_list
+                             '(' su_decl_items? ')'
+                             (('{' body = subunit_body* '}' ';'?) |
+                              ('{{' latency_items* '}}' ';'? ))
+                        ;
+su_decl_items           : su_decl_item (';' su_decl_item)*
+                        ;
+su_decl_item            : RESOURCE name_list
+                        | PORT     name_list
+                        ;
+su_base_list            : (':' (unit=ident | regex=STRING_LITERAL))*
+                        ;
+
+//---------------------------------------------------------------------------
+// Subunit template statements.
+//---------------------------------------------------------------------------
+subunit_body            : latency_instance
+                        ;
+latency_instance        : (predicate=name_list ':')? latency_statement
+                        | predicate=name_list ':'
+                             '{' latency_statement* '}' ';'?
+                        ;
+latency_statement       : LATENCY ident '(' resource_refs? ')' ';'
+                        ;
+
+//---------------------------------------------------------------------------
+// Latency template definition.
+//---------------------------------------------------------------------------
+latency_template        : LATENCY name=ident base=base_list
+                             '(' su_decl_items? ')'
+                             '{' latency_items* '}' ';'?
+                        ;
+latency_items           : (predicate=name_list ':')?
+                               (latency_item | ('{' latency_item* '}' ';'?))
+                        ;
+latency_item            : latency_ref
+                        | conditional_ref
+                        | fus_statement
+                        ;
+
+//---------------------------------------------------------------------------
+// Conditional references
+//---------------------------------------------------------------------------
+conditional_ref         : 'if' ident '{' latency_item* '}'
+                               (conditional_elseif | conditional_else)?
+                        ;
+conditional_elseif      : 'else' 'if' ident '{' latency_item* '}'
+                               (conditional_elseif | conditional_else)?
+                        ;
+conditional_else        : 'else' '{' latency_item* '}'
+                        ;
+
+//---------------------------------------------------------------------------
+// Basic references
+//---------------------------------------------------------------------------
+latency_ref             : ref_type '(' latency_spec ')' ';'
+                        ;
+ref_type                : (USE | DEF | USEDEF | KILL | HOLD | RES | PREDICATE)
+                        ;
+latency_spec            : expr (':' cycles=number)? ',' latency_resource_refs
+                        | expr ('[' repeat=number (',' delay=number)? ']')?
+                               ',' operand
+                        | expr ',' operand ',' latency_resource_refs
+                        ;
+expr                    : '-' negate=expr
+                        | left=expr mop=('*' | '/') right=expr
+                        | left=expr aop=('+' | '-') right=expr
+                        | '{' posexpr=expr '}'
+                        | '(' subexpr=expr ')'
+                        | phase_name=ident
+                        | num=number
+                        | opnd=operand
+                        ;
+
+//---------------------------------------------------------------------------
+// Shorthand for a reference that uses functional units.
+//---------------------------------------------------------------------------
+fus_statement           : FUS '(' (fus_item ('&' fus_item)* ',')?
+                                  micro_ops=snumber (',' fus_attribute)* ')' ';'
+                        ;
+fus_item                : name=ident ('<' (expr ':')? number '>')?
+                        ;
+fus_attribute           : BEGINGROUP | ENDGROUP | SINGLEISSUE | RETIREOOO
+                        ;
+
+//---------------------------------------------------------------------------
+// Latency resource references allow resource allocation and value masking.
+// Member references and index referencing don't allow allocation, but do
+// allow masking.  This is checked semantically in the visitor, not here.
+//---------------------------------------------------------------------------
+latency_resource_refs   : latency_resource_ref (',' latency_resource_ref)*
+                        ;
+latency_resource_ref    : resource_ref ':' count=number    (':' value=ident)?
+                        | resource_ref ':' countname=ident (':' value=ident)?
+                        | resource_ref ':' ':' value=ident   // no allocation
+                        | resource_ref ':' all='*'
+                        | resource_ref
+                        ;
+operand                 : (type=ident ':')? '$' opnd=ident ('.' operand_ref)*
+                        | (type=ident ':')? '$' opnd_id=number
+                        | (type=ident ':')? '$$' var_opnd_id=number
+                        ;
+operand_ref             : ident | number
+                        ;
+
+//---------------------------------------------------------------------------
+// Pipeline phase names definitions.
+//---------------------------------------------------------------------------
+pipe_def                : protection? PIPE_PHASES ident '{' pipe_phases '}' ';'?
+                        ;
+protection              : PROTECTED | UNPROTECTED | HARD
+                        ;
+pipe_phases             : phase_id (',' phase_id)*
+                        ;
+phase_id                : (first_exe='#')? ident ('[' range ']')? ('=' number)?
+                        ;
+
+//---------------------------------------------------------------------------
+// Resource definitions: global in scope, CPU- or Cluster- or FU-level.
+//---------------------------------------------------------------------------
+resource_def            : RESOURCE ( '(' start=ident ('..' end=ident)? ')' )?
+                              resource_decl (',' resource_decl)*  ';'
+                        ;
+resource_decl           : name=ident (':' bits=number)? ('[' count=number ']')?
+                        | name=ident (':' bits=number)? '{' name_list '}'
+                        | name=ident (':' bits=number)? '{' group_list '}'
+                        ;
+resource_refs           : resource_ref (',' resource_ref)*
+                        ;
+resource_ref            : name=ident ('[' range ']')?
+                        | name=ident '.' member=ident
+                        | name=ident '[' index=number ']'
+                        | group_or=ident ('|' ident)+
+                        | group_and=ident ('&' ident)+
+                        ;
+
+//---------------------------------------------------------------------------
+// List of identifiers.
+//---------------------------------------------------------------------------
+name_list               : ident (',' ident)*
+                        ;
+group_list              : group_or=ident ('|' ident)+
+                        | group_and=ident ('&' ident)+
+                        ;
+//---------------------------------------------------------------------------
+// List of template bases
+//---------------------------------------------------------------------------
+base_list               : (':' ident)*
+                        ;
+
+//---------------------------------------------------------------------------
+// Register definitions.
+//---------------------------------------------------------------------------
+register_def            : REGISTER register_decl (',' register_decl)* ';'
+                        ;
+register_decl           : name=ident ('[' range ']')?
+                        ;
+register_class          : REGCLASS ident
+                            '{' register_decl (',' register_decl)* '}' ';'?
+                        | REGCLASS ident '{' '}' ';'?
+                        ;
+
+//---------------------------------------------------------------------------
+// Instruction definition.
+//---------------------------------------------------------------------------
+instruction_def         : INSTRUCTION name=ident
+                             '(' (operand_decl (',' operand_decl)*)? ')'
+                             '{'
+                                 (SUBUNIT '(' subunit=name_list ')' ';' )?
+                                 (DERIVED '(' derived=name_list ')' ';' )?
+                             '}' ';'?
+                        ;
+
+//---------------------------------------------------------------------------
+// Operand definition.
+//---------------------------------------------------------------------------
+operand_def             : OPERAND name=ident
+                             '(' (operand_decl (',' operand_decl)*)? ')'
+                             '{' (operand_type | operand_attribute)* '}' ';'?
+                        ;
+operand_decl            : ((type=ident (name=ident)?) | ellipsis='...')
+                              (input='(I)' | output='(O)')?
+                        ;
+
+operand_type            : TYPE '(' type=ident ')' ';'
+                        ;
+operand_attribute       : (predicate=name_list ':')? operand_attribute_stmt
+                        | predicate=name_list ':'
+                              '{' operand_attribute_stmt* '}' ';'?
+                        ;
+operand_attribute_stmt  : ATTRIBUTE name=ident '='
+                          (value=snumber | values=tuple)
+                           (IF type=ident
+                                   ('[' pred_value (',' pred_value)* ']' )? )?
+                            ';'
+                        ;
+pred_value              : value=snumber
+                        | low=snumber '..' high=snumber
+                        | '{' mask=number '}'
+                        ;
+
+//---------------------------------------------------------------------------
+// Derived Operand definition.
+//---------------------------------------------------------------------------
+derived_operand_def     : OPERAND name=ident base_list  ('(' ')')?
+                              '{' (operand_type | operand_attribute)* '}' ';'?
+                        ;
+
+//---------------------------------------------------------------------------
+// Predicate definition.
+//---------------------------------------------------------------------------
+predicate_def           : PREDICATE ident ':' predicate_op? ';'
+                        ;
+
+predicate_op            : pred_opcode '<' pred_opnd (',' pred_opnd)* ','? '>'
+                        | code=code_escape
+                        | ident
+                        ;
+code_escape             : '[' '{' .*? '}' ']'
+                        ;
+
+pred_opnd               : name=ident
+                        | snumber
+                        | string=STRING_LITERAL
+                        | '[' opcode_list=ident (',' ident)* ']'
+                        | pred=predicate_op
+                        | operand
+                        ;
+
+pred_opcode             : 'CheckAny' | 'CheckAll' | 'CheckNot'
+                        | 'CheckOpcode'
+                        | 'CheckIsRegOperand' | 'CheckRegOperand'
+                        | 'CheckSameRegOperand' | 'CheckNumOperands'
+                        | 'CheckIsImmOperand' | 'CheckImmOperand'
+                        | 'CheckZeroOperand' | 'CheckInvalidRegOperand'
+                        | 'CheckFunctionPredicate'
+                        | 'CheckFunctionPredicateWithTII'
+                        | 'TIIPredicate'
+                        | 'OpcodeSwitchStatement' | 'OpcodeSwitchCase'
+                        | 'ReturnStatement'
+                        | 'MCSchedPredicate'
+                        ;
+
+//---------------------------------------------------------------------------
+// ANTLR hack to allow some identifiers to override some keywords in some
+// circumstances (wherever "ident" is used).  For the most part, we just
+// allow overriding "short" keywords for resources, registers, operands,
+// pipeline names, and ports.
+//---------------------------------------------------------------------------
+ident                   : 'use' | 'def' | 'kill' | 'usedef' | 'hold' | 'res'
+                        | 'port' | 'to' | 'via' | 'core' | 'cpu' | 'issue'
+                        | 'class' | 'type' | 'hard' | 'if' | 'family'
+                        | 'fus' | 'BeginGroup' | 'EndGroup' | 'SingleIssue'
+                        | 'RetireOOO' | 'register' | IDENT
+                        ;
+
+//---------------------------------------------------------------------------
+// Match and convert a number.
+//---------------------------------------------------------------------------
+number returns [int64_t value]
+                         : NUMBER { $value = std::stoul($NUMBER.text, 0, 0); }
+                         ;
+snumber returns [int64_t value]
+                         : NUMBER { $value = std::stoul($NUMBER.text, 0, 0); }
+                         | '-' NUMBER
+                                { $value = -std::stoul($NUMBER.text, 0, 0); }
+                         ;
+
+//---------------------------------------------------------------------------
+// Match a set of numbers.
+//---------------------------------------------------------------------------
+tuple                    : '[' snumber (',' snumber)* ']'
+                         ;
+
+//---------------------------------------------------------------------------
+// A constrained range - both must be non-negative numbers.
+//---------------------------------------------------------------------------
+range                   : first=number '..' last=number
+                        ;
+
+//---------------------------------------------------------------------------
+// Token definitions.
+//---------------------------------------------------------------------------
+FAMILY                  : 'family';
+CPU                     : 'cpu';
+CORE                    : 'core';
+CLUSTER                 : 'cluster';
+REORDER_BUFFER          : 'reorder_buffer';
+ISSUE                   : 'issue';
+FUNCUNIT                : 'func_unit';
+FORWARD                 : 'forward';
+FUNCGROUP               : 'func_group';
+CONNECT                 : 'connect';
+SUBUNIT                 : 'subunit';
+FUS                     : 'fus';
+BEGINGROUP              : 'BeginGroup';
+ENDGROUP                : 'EndGroup';
+SINGLEISSUE             : 'SingleIssue';
+RETIREOOO               : 'RetireOOO';
+MICROOPS                : 'micro_ops';
+DERIVED                 : 'derived';
+LATENCY                 : 'latency';
+PIPE_PHASES             : 'phases';
+PROTECTED               : 'protected';
+UNPROTECTED             : 'unprotected';
+HARD                    : 'hard';
+RESOURCE                : 'resource';
+PORT                    : 'port';
+TO                      : 'to';
+VIA                     : 'via';
+REGISTER                : 'register';
+REGCLASS                : 'register_class';
+CLASS                   : 'class';
+IMPORT                  : 'import';
+INSTRUCTION             : 'instruction';
+OPERAND                 : 'operand';
+TYPE                    : 'type';
+ATTRIBUTE               : 'attribute';
+IF                      : 'if';
+USE                     : 'use';
+DEF                     : 'def';
+USEDEF                  : 'usedef';
+KILL                    : 'kill';
+HOLD                    : 'hold';
+RES                     : 'res';
+PREDICATE               : 'predicate';
+
+IDENT                   : [_a-zA-Z][_a-zA-Z0-9]*;
+
+NUMBER                  : HEX_NUMBER | OCT_NUMBER | BIN_NUMBER | DEC_NUMBER;
+DEC_NUMBER              : '0' | [1-9][0-9]*;
+HEX_NUMBER              : '0x' HEX_DIGIT (HEX_DIGIT | '\'')*;
+HEX_DIGIT               : [0-9a-fA-F];
+OCT_NUMBER              : '0' OCT_DIGIT (OCT_DIGIT | '\'')*;
+OCT_DIGIT               : [0-7];
+BIN_NUMBER              : '0b' [0-1] ([0-1] | '\'')*;
+
+STRING_LITERAL          : UNTERMINATED_STRING_LITERAL '"';
+UNTERMINATED_STRING_LITERAL : '"' (~["\\\r\n] | '\\' (. | EOF))*;
+
+BLOCK_COMMENT           : '/*' .*? '*/' -> channel(HIDDEN);
+LINE_COMMENT            : '//' .*?[\n\r] -> channel(HIDDEN);
+WS                      : [ \t\r\n]     -> channel(HIDDEN);
+
diff --git a/llvm/utils/MdlCompiler/mdl_dump.cpp b/llvm/utils/MdlCompiler/mdl_dump.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_dump.cpp
@@ -0,0 +1,665 @@
+//===- mdl_dump.cpp - Dump out internal MDL objects -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MDL method implementations that dump the internal representation of the
+// input machine descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <string>
+
+#include "mdl.h"
+
+namespace mpact {
+namespace mdl {
+
+//----------------------------------------------------------------------------
+// Definitions of functions that format objects for printing that represent
+// the input machine description.
+//----------------------------------------------------------------------------
+// Stringify a reference to an Identifier.
+std::string Identifier::ToString() const { return name(); }
+
+// Stringify a phase name definition.
+std::string PhaseName::ToString() const { return name(); }
+
+// Stringify a register definition.
+std::string RegisterDef::ToString() const { return name(); }
+
+// Stringify a register class definition.
+std::string RegisterClass::ToString() const {
+  std::string hdr = formatv("\nRegister class: {0}", name());
+  std::string members = StringVec<RegisterDef *>(members_, " { ", ", ", " }");
+  return hdr + members;
+}
+
+// Stringify a reference to a register class.
+std::string RegisterClassRef::ToString() const { return id_->ToString(); }
+
+// Stringify a pipe phase set definition.
+std::string PipePhases::ToString() const {
+  std::string type;
+  if (is_hard_)
+    type = "(hard):";
+  else if (is_protected_)
+    type = "(protected):";
+  else
+    type = "(unprotected):";
+  return formatv("Pipe Phases {0}{1}", id_->ToString(),
+                 StringVec<PhaseName *>(phase_names_, type, ", "));
+}
+
+// Stringify a resource definition.
+std::string ResourceDef::ToString() const {
+  std::string out = formatv("Resource {0}", id_->ToString());
+  if (get_resource_id() > 0)
+    out += formatv("{{{0}}", get_resource_id());
+  if (start_phase_)
+    out += formatv("({0}", start_phase_->ToString());
+  if (end_phase_)
+    out += formatv("..{0}", end_phase_->ToString());
+  if (start_phase_)
+    out += ")";
+  if (bit_size_ > 0)
+    out += formatv(":{0}", bit_size_);
+  if (pool_size_ > 0)
+    out += formatv("[{0}]", pool_size_);
+  if (!members_.empty()) {
+    auto sep = group_type() == GroupType::kUseAll ? " & " : " , ";
+    out += StringVec<Identifier *>(&members_, " {", sep, "}");
+  }
+
+  if (reg_class_)
+    out += formatv(" <{0}>", reg_class()->id()->ToString());
+  if (port_resource() != nullptr)
+    out += formatv("<{0}>", port_resource()->ToString());
+  return out;
+}
+
+// Format a string that summarizes the refs we've seen to this resource.
+std::string ResourceDef::ref_summary() const {
+  if (earliest_ref_ == -1 && latest_ref_ == -1 && !phase_expr_seen_)
+    return "";
+  return formatv(",\tcycles: [{0}..{1}]{2}{3}", earliest_ref_, latest_ref_,
+                 phase_expr_seen_ ? " (expr) " : " ",
+                 FormatReferenceTypes(ref_types_));
+}
+
+// Stringify a reference to a resource.
+std::string ResourceRef::ToString() const {
+  std::string out = id_->ToString();
+
+  if (IsGroupRef()) {
+    std::string ids;
+    auto sep = definition_->group_type() == GroupType::kUseAll ? "&" : ",";
+    for (auto *member : definition_->member_defs()) {
+      if (!ids.empty())
+        ids += sep;
+      ids += std::to_string(member->get_resource_id());
+    }
+    out += "{" + ids + "}";
+  }
+
+  if (get_resource_id() > 0)
+    out += formatv("{{{0}}", get_resource_id());
+
+  if (member_)
+    out += formatv(".{0}{{{1}}", member_->ToString(), member_id());
+  if (first_ != -1 && last_ != -1) {
+    if (last_ != first_)
+      out += formatv("[{0}..{1}]", first_, last_);
+    else
+      out += formatv("[{0}]", first_);
+  }
+  if (pool_count_ != -1)
+    out += formatv(":{0}", pool_count_);
+  if (pool_count_name_)
+    out += formatv(":{0}", pool_count_name_->ToString());
+  if (value_name_)
+    out += formatv(":{0}", value_name_->ToString());
+  if (operand_index_ != -1)
+    out += formatv("-->{0}", operand_index_);
+  return out;
+}
+
+static const char *divider =
+    "===========================================================\n";
+
+// Stringify a CPU instance.
+std::string CpuInstance::ToString() const {
+  std::string out =
+      formatv("{0}Cpu Definition:{1}\n\n", divider, id_->ToString());
+
+  return out + formatv("{0}{1}{2}{3}{4}", StringVec<PipePhases *>(pipe_phases_),
+                       StringVec<ResourceDef *>(issues_, "Issue Slots:", ", "),
+                       StringVec<ResourceDef *>(resources_),
+                       StringVec<ClusterInstance *>(clusters_),
+                       StringVec<ForwardStmt *>(forward_stmts_));
+}
+
+// Stringify a cluster instance.
+std::string ClusterInstance::ToString() const {
+  std::string out = formatv("{0}Cluster: {1}\n\n", divider, id_->ToString());
+
+  return out + formatv("{0}{1}{2}{3}",
+                       StringVec<ResourceDef *>(issues_, "Issue Slots:", ", "),
+                       StringVec<ResourceDef *>(resources_),
+                       StringVec<FuncUnitInstance *>(func_units_),
+                       StringVec<ForwardStmt *>(forward_stmts_));
+}
+
+// Stringify a single forwarding statement.
+std::string ForwardStmt::ToString() const {
+  std::string out = formatv("Forward: {0} -> ", from_unit_->name());
+  for (auto [unit, cycles] : to_units_)
+    out += formatv("{0}({1})", unit->name(), cycles);
+  return out + "\n";
+}
+
+// Stringify a functional unit instance.
+std::string FuncUnitInstance::ToString() const {
+  std::string out = formatv("Func Unit: {0}", type_->ToString());
+  if (id_)
+    out += formatv(" {0}", id_->ToString());
+  out += StringVec<ResourceRef *>(args_, "(", ", ", ")");
+  if (pin_any_)
+    out += StringVec<Identifier *>(pin_any_, " -> ", " | ", "");
+  if (pin_all_)
+    out += StringVec<Identifier *>(pin_all_, " -> ", " & ", "");
+  return out;
+}
+
+// Stringify a subunit instance.
+std::string SubUnitInstance::ToString() const {
+  std::string out = StringVec<Identifier *>(predicates_, "[", ",", "] : ");
+  out += formatv("Subunit: {0}", id_->ToString());
+  if (args_ == nullptr)
+    return out + "()\n";
+  return out + StringVec<ResourceRef *>(args_, "(", ", ", ")\n");
+}
+
+// Stringify a latency instance.
+std::string LatencyInstance::ToString() const {
+  std::string out = StringVec<Identifier *>(predicates_, "[", ",", "] : ");
+  out += formatv("Latency {0}", id_->ToString());
+  if (args_ == nullptr)
+    return out + "()\n";
+  return out + StringVec<ResourceRef *>(args_, "(", ", ", ")\n");
+}
+
+// Stringify a parameter of a functional unit, subunit, or latency template.
+std::string Params::ToString() const {
+  const char *kinds[] = {"p", "c", "r"};
+  return formatv("{0}:{1}", kinds[static_cast<int>(type_)], id_->ToString());
+}
+
+// Stringify a functional unit template definition.
+std::string FuncUnitTemplate::ToString() const {
+  std::string out =
+      formatv("{0}Func Unit Template: {1}\n\n", divider, id_->ToString());
+
+  if (bases_ && !bases_->empty())
+    out +=
+        StringVec<Identifier *>(bases_, "Base Functional Unit: ", ", ", "\n");
+
+  out += StringVec<Params *>(params_, "Template Parameters(", ", ", ")\n\n");
+  if (ports_ && !ports_->empty())
+    out += StringVec<Identifier *>(ports_, "Ports: ", ", ", "\n");
+  out += StringVec<ResourceDef *>(resources_);
+  out += StringVec<Connect *>(connections_, "", "", "");
+  out += StringVec<SubUnitInstance *>(subunits_, "", "", "");
+  return out;
+}
+
+// Stringify a connect statement in a functional unit template.
+std::string Connect::ToString() const {
+  std::string out = formatv("Connect {0}", id_->ToString());
+  if (reg_class_)
+    out += formatv(" to {0}", reg_class_->ToString());
+  if (resource_)
+    out += formatv(" via {0}", resource_->ToString());
+  return out + "\n";
+}
+
+// Stringify a subunit template definition.
+std::string SubUnitTemplate::ToString() const {
+  std::string out =
+      formatv("{0}Sub Unit Template: {1}\n\n", divider, type_->ToString());
+
+  if (bases_ && !bases_->empty())
+    out += StringVec<Identifier *>(bases_, "Base Subunits: ", ", ", "\n");
+
+  out += StringVec<Params *>(params_, "Template Parameters(", ", ", ")\n") +
+         StringVec<LatencyInstance *>(latencies_);
+  out += "\n";
+  return out;
+}
+
+// Stringify a latency template definition.
+std::string LatencyTemplate::ToString() const {
+  return formatv(
+      "{0}Latency Template: {1}\n\n{2}{3}{4}{5}", divider, id_->ToString(),
+      StringVec<Identifier *>(base_ids_, "Bases: ", ", ", "\n"),
+      StringVec<Params *>(params_, "Template Parameters(", ", ", ")\n"),
+      StringVec<Reference *>(references_, "   ", "\n   "),
+      StringSet<std::string>(referenced_fus_, "FUs:", ", ", "\n"));
+}
+
+// Find an appropriate name for a operand reference type.
+std::string OperandRef::type_name() const {
+  if (operand_decl_)
+    return operand_decl_->type_name();
+  if (operand_)
+    return operand_->name();
+  if (reg_class_)
+    return reg_class_->name();
+  if (op_type_)
+    return op_type_->ToString();
+  return "";
+}
+
+// Stringify a single operand descriptor in a latency reference object.
+std::string OperandRef::ToString() const {
+  std::string out = type_name();
+  if (!out.empty())
+    out += ":";
+  out += "$" + StringVec<Identifier *>(op_names_, "", ".", "");
+  if (operand_index_ != -1)
+    out += formatv("[{0}]", operand_index_);
+  return out;
+}
+
+// Stringify a latency expression for debug output.
+std::string PhaseExpr::ToString() const {
+  std::string left = left_ ? left_->ToString() : "";
+  std::string right = right_ ? right_->ToString() : "";
+
+  switch (operation_) {
+  case kPlus:
+    return formatv("({0}+{1})", left, right);
+  case kMinus:
+    return formatv("({0}-{1})", left, right);
+  case kMult:
+    return formatv("({0}*{1})", left, right);
+  case kDiv:
+    return formatv("({0}/{1})", left, right);
+  case kNeg:
+    return formatv("(-{0})", left);
+  case kPositive:
+    return formatv("{{{0}}", left);
+  case kOpnd:
+    return operand_->ToString();
+  case kInt:
+    return formatv("{0}", number_);
+  case kPhase:
+    if (phase_name_)
+      return phase_name_->ToString();
+    if (phase_)
+      return phase_->ToString();
+  }
+  return "Unknown";
+}
+
+// Create a string that briefly represents the protection type of a phase.
+std::string PhaseName::FormatProtection() const {
+  if (is_hard_)
+    return ".h";
+  if (is_protected_)
+    return ".p";
+  return ".u";
+}
+
+// Stringify a reference argument in a latency template.
+std::string Reference::ToString() const {
+  std::string out;
+  if (IsConditionalRef()) {
+    out = conditional_ref()->ToString(false);
+  } else {
+    out = RefTypeToString(ref_type());
+
+    if (ref_type() == RefTypes::kFus) {
+      out += "(";
+      ResourceRef *res = nullptr;
+      if (!resources_->empty()) {
+        res = (*resources_)[0];
+        out += res->name();
+      }
+      if (phase_expr_ != nullptr)
+        out += formatv("<{0}:{1}> ", phase_expr_->ToString(), use_cycles());
+      else
+        out += formatv("<{0}> ", use_cycles());
+
+      if (micro_ops() != 0)
+        out += formatv("Mops={0} ", micro_ops());
+      // if (res && res->definition()->is_unreserved()) out += "unreserved ";
+      // if (res && res->definition()->is_buffered()) out += "buffered ";
+      if (RefFlags::is_begin_group(fu_flags()))
+        out += "begin-group ";
+      if (RefFlags::is_end_group(fu_flags()))
+        out += "end-group ";
+      if (RefFlags::is_single_issue(fu_flags()))
+        out += "single-issue ";
+      if (RefFlags::is_retire_ooo(fu_flags()))
+        out += "retire-ooo ";
+      return out + ")";
+    }
+    out += formatv("{0}({1}", phase_expr_->FormatProtection(),
+                   phase_expr_->ToString());
+    if (use_cycles() != 1)
+      out += formatv(":{0}", use_cycles());
+    if (repeat() != 1)
+      out += formatv("[{0},{1}]", repeat(), delay());
+    if (operand_)
+      out += formatv(", {0}", operand_->ToString());
+    if (ref_type() != RefTypes::kFus && !resources_->empty())
+      out += StringVec<ResourceRef *>(resources_, ", <", ", ", ">");
+    if (port_ && port_->reg_class())
+      out +=
+          formatv(", port {0}<{1}>", port_->name(), port_->reg_class()->name());
+    out += ")";
+  }
+  if (predicates_)
+    out += StringVec<Identifier *>(predicates_, "  {", ",", "}");
+  return out;
+}
+
+// Stringify an if/then/else reference.
+std::string ConditionalRef::ToString(bool is_else) {
+  std::string pred = predicate_ ? " if " + predicate_->name() : " ";
+  std::string out = (is_else ? " else " : "") + pred + "\n";
+  for (auto *ref : refs())
+    out += "           " + ref->ToString() + "\n";
+  if (else_clause())
+    out += else_clause()->ToString(true);
+  if (out.back() == '\n')
+    out.pop_back();
+  return out + "\n";
+}
+
+// Stringify a single operand declaration for an instruction or operand.
+std::string OperandDecl::ToString() const {
+  if (print_fully_qualified_declaration_ && types_->size() > 1)
+    return StringVec<Identifier *>(types_, "(", ".", ") ") +
+           StringVec<Identifier *>(names_, "(", ".", ")");
+  return formatv("{0} {1}", types_->back()->name(), names_->back()->name());
+}
+
+// Stringify a single operand definition.
+std::string OperandDef::ToString() const {
+  std::string out = formatv("Operand: {0}", name());
+  std::string type = type_ ? formatv("type({0});", type_->ToString()) : "";
+
+  // For LLVM operands, write out a short form of the operand.
+  if (operands_ && type_ && !bases_)
+    return out + StringVec<OperandDecl *>(operands_, "(", ", ", ")") +
+           formatv(" {{ {0} }\n", type);
+  if (bases_)
+    out += StringVec<Identifier *>(bases_, ": ", ": ", "");
+  out += " {\n";
+  if (type_)
+    out += formatv("    {0}\n", type);
+  return out + StringVec<OperandAttribute *>(attributes_) + "}\n";
+}
+
+// Stringify an operand attribute.
+std::string OperandAttribute::ToString() const {
+  std::string out = formatv("    attribute {0} = ", name_->ToString());
+  if (values()->size() == 1) {
+    out += formatv("{0}", values(0));
+  } else {
+    out += "[";
+    for (auto value : *values())
+      out += formatv("{0},", value);
+    out += "]";
+  }
+  if (!predicate_values_->empty())
+    out += "\n      ";
+  if (!type().empty())
+    out += formatv(" if {0}", type());
+  if (!predicate_values_->empty())
+    out += StringVec<PredValue *>(predicate_values_, " [", ", ", "]");
+  if (predicate_)
+    out += StringVec<Identifier *>(predicate_, "  {", ",", "}");
+  return out;
+}
+
+// Format an operand predicate value. Mostly we want to avoid printing
+// out long decimal numbers.
+std::string PredValue::FormatValue(int64_t value) const {
+  constexpr int kMinValue = 0;
+  constexpr int kMaxValue = 9999;
+  if (value >= kMinValue && value <= kMaxValue)
+    return std::to_string(value);
+  else
+    return formatv("{0:X8}UL", static_cast<uint64_t>(value));
+}
+
+// Stringify an operand attribute predicate value.
+std::string PredValue::ToString() const {
+  if (IsValue())
+    return FormatValue(value());
+  if (IsRange())
+    return formatv("{0}..{1}", FormatValue(low()), FormatValue(high()));
+  if (IsMask())
+    return formatv("{{ {0:X8}UL }", mask());
+  return "empty";
+}
+
+// Stringify a single instruction definition.
+std::string InstructionDef::ToString() const {
+  return formatv(
+      "Instruction: {0}{1}{2}{3}{4}", name(),
+      StringVec<OperandDecl *>(operands_, "(", ", ", ")\n"),
+      StringVec<OperandDecl *>(flat_operands_, "\t\tflat(", ", ", ")\n"),
+      StringVec<Identifier *>(subunits_, "\t\t{ subunit(", ",", "); }\n"),
+      StringVec<Identifier *>(derived_, "\t\t{ derived(", ",", "); }\n"));
+}
+
+// Stringify all the instruction definitions.
+// We organize the list by subunit, so that instructions sharing a subunit are
+// dumped next to each other.  The purpose of this is to help the user write
+// and debug the machine description for similar instructions.
+std::string DumpInstructionDefs(const InstructionList &instructions) {
+  std::string out;
+
+  // build a map of instruction lists indexed by the first subunit name.
+  std::map<std::string, InstructionList> instruction_map;
+  for (auto *instruct : instructions)
+    if (!instruct->subunits()->empty())
+      instruction_map[(*instruct->subunits())[0]->name()].push_back(instruct);
+
+  for (auto &entries : instruction_map) {
+    auto [subunit_name, instructions] = entries;
+    for (auto *instruction : instructions)
+      out += instruction->ToString();
+  }
+
+  return out;
+}
+
+// Stringify the entire machine description.
+std::string MdlSpec::ToString() const {
+  return formatv("{0}Machine Description\n\n", divider) +
+         StringVec<PipePhases *>(&pipe_phases_) +
+         StringVec<ResourceDef *>(&resources_) +
+         StringVec<RegisterDef *>(&registers_, "", ", ", "\n") +
+         StringVec<RegisterClass *>(&reg_classes_) +
+         StringVec<CpuInstance *>(&cpus_) +
+         StringVec<FuncUnitTemplate *>(&func_units_) +
+         StringVec<SubUnitTemplate *>(&subunits_) +
+         StringVec<LatencyTemplate *>(&latencies_) +
+         StringVec<OperandDef *>(&operands_) +
+         DumpInstructionDefs(instructions_);
+}
+
+// Print details of a single functional unit instantiation.
+void FuncUnitInstantiation::DumpFuncUnitInstantiation() {
+  auto out = formatv("{0}: {{{1}} {2} {3}(", cpu()->name(), cluster()->name(),
+                     func_type()->name(), instance()->name());
+
+  if (ResourceRefList *args = instance()->args()) {
+    int params = std::min(func_type()->params()->size(), args->size());
+    for (int argid = 0; argid < params; argid++) {
+      if ((*func_type()->params())[argid]->IsResource())
+        out += GetResourceArg(argid)->ToString();
+      else
+        out += GetClassArg(argid)->ToString();
+      if (argid < params - 1)
+        out += ", ";
+    }
+  }
+  std::cout << out << ")\n";
+}
+
+void ClusterInstance::DumpFuncUnitInstantiations() {
+  for (auto *fu : fu_instantiations_) {
+    std::cout << "\nFunc_unit: " << fu->func_type()->name()
+              << "---------------------------------------\n";
+    fu->DumpFuncUnitInstantiation();
+  }
+}
+
+// Print details of all functional unit instantiations.
+void MdlSpec::DumpFuncUnitInstantiations() {
+  for (const auto *cpu : cpus())
+    for (auto *cluster : *cpu->clusters())
+      cluster->DumpFuncUnitInstantiations();
+}
+
+// Print details of a single subunit instantiation.
+void SubUnitInstantiation::DumpSubUnitInstantiation() {
+  auto out =
+      formatv("{0}: {{{1}} {2} {3} <{4}>(", func_unit()->cpu()->name(),
+              func_unit()->cluster()->name(), func_unit()->func_type()->name(),
+              func_unit()->instance()->name(), subunit()->name());
+
+  if (ResourceRefList *args = subunit()->args()) {
+    int params = std::min(su_template()->params()->size(), args->size());
+    for (int argid = 0; argid < params; argid++) {
+      if ((*su_template()->params())[argid]->IsResource())
+        out += GetResourceArg(argid)->ToString();
+      else
+        out += GetPortArg(argid)->ToString();
+      if (argid < params - 1)
+        out += ", ";
+    }
+  }
+  out += ")\n";
+
+  for (auto *ref : references())
+    out += formatv("    {0}\n", ref->ToString());
+
+  std::cout << out;
+}
+
+// Print details of all subunit instantiations.
+void MdlSpec::DumpSubUnitInstantiations() {
+  // Dump out all instantiations for each subunit.
+  for (const auto &subunit : su_instantiations()) {
+    auto [name, unit] = subunit;
+    std::cout << formatv(
+        "\nSubunit: {0} ---------------------------------------\n", name);
+    for (auto *su : *unit)
+      su->DumpSubUnitInstantiation();
+  }
+}
+
+// Print details of a single latency instantiation.
+void LatencyInstantiation::DumpLatencyInstantiation() {
+  auto out = formatv("{0}: {{{1}} {2} {3} <{4}>[{5}](",
+                     subunit()->func_unit()->cpu()->name(),
+                     subunit()->func_unit()->cluster()->name(),
+                     subunit()->func_unit()->func_type()->name(),
+                     subunit()->func_unit()->instance()->name(),
+                     subunit()->subunit()->name(), latency()->name());
+
+  if (ResourceRefList *args = latency()->args()) {
+    int params = std::min(lat_template()->params()->size(), args->size());
+    for (int argid = 0; argid < params; argid++) {
+      if ((*lat_template()->params())[argid]->IsResource())
+        out += GetResourceArg(argid)->ToString();
+      else
+        out += GetPortArg(argid)->ToString();
+      if (argid < params - 1)
+        out += ", ";
+    }
+  }
+  std::cout << out << ")\n";
+}
+
+void MdlSpec::DumpPredicates() {
+  for (const auto &[name, expr] : predicate_table_)
+    std::cout << formatv("Predicate {0} : {1}\n\n", name, expr->ToString(0));
+}
+
+// Format a string that represents the ids associated with a resource.
+std::string ResourceDef::resource_format() {
+  int id = get_resource_id();
+  std::string out = formatv("{0} : ", debug_name());
+
+  if (!IsGroupDef() && pool_size() <= 1)
+    out += std::to_string(id);
+  if (IsGroupDef()) {
+    out += "[";
+    for (auto *mem : member_defs())
+      out += std::to_string(mem->get_resource_id()) + ",";
+    out += "]";
+  }
+  if (pool_size() > 1)
+    out += formatv("[{0}..{1}]", id, pool_size() + id - 1);
+
+  return out;
+}
+
+std::string SubPool::ToString() const {
+  if (first() == -1 && last() == -1)
+    return "[group]";
+  return formatv("subrange: [{0}..{1}]", first(), last());
+}
+
+// Write out all allocation pools associate with a subpool.
+std::string SubPoolInfo::ToString(std::string subpool) const {
+  std::string out;
+  int pool_id = subpool_id();
+
+  for (auto rit = counts().rbegin(); rit != counts().rend(); rit++)
+    out += formatv("    Subpool:{0} size:{1} {2}\n", pool_id++, *rit, subpool);
+  return out;
+}
+
+// Dump resource ids for each resource.
+void MdlSpec::DumpResourceIds() {
+  std::string out;
+  for (auto *cpu : cpus()) {
+    out += formatv("\nResources defined for '{0}' "
+                   "---------------------------------------\n",
+                   cpu->name());
+    for (auto res : cpu->all_resources())
+      out += formatv("{0}{1}\n", res->resource_format(), res->ref_summary());
+
+    out += formatv("\nPooled resources defined for '{0}' "
+                   "--------------------------------\n",
+                   cpu->name());
+
+    for (auto *res : cpu->pool_resources())
+      if (!res->alloc_sizes().empty()) {
+        out += formatv("{0}{1}\n", res->resource_format(), res->ref_summary());
+        for (auto &[subpool, info] : res->sub_pools())
+          out += info.ToString(subpool.ToString());
+      }
+  }
+
+  // Write out the string!
+  std::cout << out;
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/MdlCompiler/mdl_generate.h b/llvm/utils/MdlCompiler/mdl_generate.h
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_generate.h
@@ -0,0 +1,275 @@
+//===- mdl_generate.h - Objects for generate the MDL database -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains class definitions used to build the machine description
+//  database.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MDL_COMPILER_MDL_GENERATE_H_
+#define MDL_COMPILER_MDL_GENERATE_H_
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mdl.h"
+
+namespace mpact {
+namespace mdl {
+
+class InstrInfo;
+class ResourceEvent;
+struct TargetDataBase;
+
+using ResourceList = std::vector<ResourceEvent>;
+using ResourceSets = std::vector<std::vector<ResourceEvent>>;
+using InstrInfoList = std::vector<InstrInfo *>;
+
+constexpr int kOneCycle = 1;
+
+//-----------------------------------------------------------------------------
+// A description of a single resource reference.
+//-----------------------------------------------------------------------------
+class ResourceEvent {
+public:
+  ResourceEvent(RefType type, PhaseExpr *expr, int use_cycles, ResourceRef *res,
+                Reference *ref = nullptr,
+                SubUnitInstantiation *subunit = nullptr)
+      : ref_type_(type), phase_expr_(expr), use_cycles_(use_cycles),
+        resource_(res), reference_(ref), subunit_(subunit) {
+    res->definition()->RecordReference(type, expr, res, ref, subunit);
+    SetConstantPhase();
+  }
+  ResourceEvent(RefType type, PhaseExpr *expr, ResourceRef *res,
+                Reference *ref = nullptr,
+                SubUnitInstantiation *subunit = nullptr)
+      : ref_type_(type), phase_expr_(expr), use_cycles_(kOneCycle),
+        resource_(res), reference_(ref), subunit_(subunit) {
+    res->definition()->RecordReference(type, expr, res, ref, subunit);
+    SetConstantPhase();
+  }
+  ResourceEvent(RefType type, PhaseExpr *expr, ResourceDef *port)
+      : ref_type_(type), phase_expr_(expr), resource_(new ResourceRef(port)) {
+    port->RecordReference(type, expr, nullptr, nullptr, nullptr);
+    SetConstantPhase();
+  }
+  // Constructor for an fus reference, including micro_ops.
+  ResourceEvent(RefType type, PhaseExpr *expr, int use_cycles, ResourceRef *res,
+                int micro_ops, bool is_buffered, RefFlags::Item fu_flags)
+      : ref_type_(type), phase_expr_(expr), use_cycles_(use_cycles),
+        resource_(res), micro_ops_(micro_ops), is_buffered_(is_buffered),
+        fu_flags_(fu_flags) {
+    res->definition()->RecordReference(type, expr, res, nullptr, nullptr);
+  }
+
+  RefType ref_type() const { return ref_type_; }
+  bool IsFuncUnitRef() const { return ref_type_ == RefTypes::kFus; }
+  PhaseExpr *phase_expr() const { return phase_expr_; }
+  int use_cycles() const { return use_cycles_; }
+  int micro_ops() const { return micro_ops_; }
+  bool is_buffered() const { return is_buffered_; }
+  RefFlags::Item fu_flags() const { return fu_flags_; }
+  ResourceRef *resource() const { return resource_; }
+  Reference *reference() const { return reference_; }
+  SubUnitInstantiation *subunit() const { return subunit_; }
+  void SetConstantPhase() { phase_value_ = phase_expr_->ConstantPhase(); }
+
+  // Resource references are sorted by pipeline phase, then by resource id.
+  // If the pipeline phase is non-constant, its ordered last. If both are
+  // non-constant, use the formatting string to decide (so that the sort
+  // is stable).
+  bool operator<(const ResourceEvent &rhs) const {
+    if (phase_value_ != rhs.phase_value_) {
+      if (phase_value_ == -1)
+        return false;
+      if (rhs.phase_value_ == -1)
+        return true;
+      return phase_value_ < rhs.phase_value_;
+    }
+    if (phase_value_ == -1 && rhs.phase_value_ == -1)
+      return phase_expr_->ToString() < rhs.phase_expr_->ToString();
+
+    return resource_->get_final_resource_id() <
+           rhs.resource()->get_final_resource_id();
+  }
+  bool operator>(const ResourceEvent &rhs) const { return rhs < *this; }
+
+  std::string ToString() const {
+    return formatv("{0}{1}({2},{3})", RefTypeToString(ref_type()),
+                   phase_expr_->FormatProtection(), phase_expr_->ToString(),
+                   resource_->ToString());
+  }
+
+private:
+  RefType ref_type_;                // type of reference
+  int phase_value_ = -1;            // value of phase if constant expression
+  PhaseExpr *phase_expr_ = nullptr; // when reference happens
+  int use_cycles_ = 1;              // # cycles resource is used
+  ResourceRef *resource_;           // referenced resource
+  int micro_ops_ = 0;               // micro_ops (for fus)
+  bool is_buffered_ = false;        // true if fu has a reservation queue
+  RefFlags::Item fu_flags_;         // various flags for explicit fu refs
+  Reference *reference_ = nullptr;  // pointer to original reference
+  SubUnitInstantiation *subunit_;   // pointer to subunit instantiation context
+};
+
+//-----------------------------------------------------------------------------
+// A description of operand and resource references for a single instruction
+// and subunit instantiation pair.
+//-----------------------------------------------------------------------------
+class InstrInfo {
+public:
+  InstrInfo(InstructionDef *instruct, SubUnitInstantiation *subunit,
+            ResourceList &resources, ReferenceList *refs,
+            ReferenceList &resource_refs)
+      : instruct_(instruct), subunit_(subunit), references_(refs),
+        resources_(resources), resource_refs_(resource_refs) {}
+
+  void CheckUnreferencedOperands(bool check_all_operands);
+  ReferenceList *references() const { return references_; }
+  ResourceList &resources() { return resources_; }
+  ReferenceList &resource_refs() { return resource_refs_; }
+  SubUnitInstantiation *subunit() const { return subunit_; }
+  InstructionDef *instruct() const { return instruct_; }
+  std::string ToString() const;
+  void dump() const { std::cout << ToString() << "\n"; }
+
+private:
+  InstructionDef *instruct_;      // pointer to the instruction description
+  SubUnitInstantiation *subunit_; // which subunit instance
+
+  ReferenceList *references_;   // valid references for this instruction
+  ResourceList resources_;      // sets of resource references
+  ReferenceList resource_refs_; // conditional resources and FUs
+};
+
+//-----------------------------------------------------------------------------
+// Everything we know about the target's instructions.
+// Constructing this object creates the entire database, which is stored in
+// the contained map.
+//-----------------------------------------------------------------------------
+class InstructionDatabase {
+public:
+  InstructionDatabase(std::string directory_name, std::string file_name,
+                      bool gen_missing_info, MdlSpec &spec);
+  void GenerateInstructionInfo(InstructionDef *instruct);
+  ResourceSets BuildResourceSets(ResourceList &resources,
+                                 SubUnitInstantiation *subunit);
+
+  void FindReferencedOperands(const InstructionDef *instr, ReferenceList *refs,
+                              CpuInstance *cpu, std::set<int> &found);
+  void FindCondReferencedOperands(const InstructionDef *instr,
+                                  ConditionalRef *cond, CpuInstance *cpu,
+                                  std::set<int> &found);
+  void AddUnreferencedOperandDefs(const InstructionDef *instr,
+                                  ReferenceList *refs, CpuInstance *cpu);
+
+  // Check all instruction records for operands that don't have explicit
+  // references referring to them - these are likely errors.
+  void CheckUnreferencedOperands(bool check_all_operands) {
+    for (auto &[name, info_set] : instruction_info_)
+      for (auto *info : info_set)
+        info->CheckUnreferencedOperands(check_all_operands);
+  }
+
+  // Given a Reference operand, determine if it is valid for this instruction.
+  // If the reference operand is null, its always valid.
+  // Return true if its valid.
+  bool IsOperandValid(const InstructionDef *instr, const OperandRef *opnd,
+                      RefType ref_type) const {
+    if (opnd == nullptr)
+      return true;
+    int op_index = spec_.GetOperandIndex(instr, opnd, ref_type);
+    if (op_index == -1)
+      return false;
+
+    // For holds and reserves, we don't have to check the reference type.
+    int iref_type = static_cast<int>(ref_type);
+    if ((iref_type & RefTypes::kAnyUseDef) == 0)
+      return true;
+
+    // If the reference is any use or def, make sure it matches the type of the
+    // operand declaration in the instruction.  Input operands must be "used",
+    // and output operands must be "defed".
+    // Occasionally td files give input and output operands the same name/type
+    // (in different instructions), and latency rules must provide "defs" and
+    // "uses" for those operands, but we don't have an obvious way to decide
+    // whether a particular def or use matches an operand reference. So we use
+    // an operand's I/O designator to differentiate. (These are -always- there
+    // for definitions scraped from llvm).  If an operand doesn't have an I/O
+    // designator, we can skip this check.
+    auto *op = instr->GetOperandDecl(op_index);
+    if (op == nullptr)
+      return true;
+    if (op->is_input() && (iref_type & RefTypes::kAnyUse) == 0)
+      return false;
+    if (op->is_output() && (iref_type & RefTypes::kAnyDef) == 0)
+      return false;
+    return true;
+  }
+
+  // Look for operand references in phase expressions, and make sure the
+  // operand exists in the current instruction.
+  // Return true if the expression is valid.
+  bool IsPhaseExprValid(const InstructionDef *instr,
+                        const PhaseExpr *expr) const {
+    if (!expr)
+      return true;
+    if (expr->operation() == kOpnd)
+      return IsOperandValid(instr, expr->operand(), RefTypes::kNull);
+    return IsPhaseExprValid(instr, expr->left()) &&
+           IsPhaseExprValid(instr, expr->right());
+  }
+
+  // Return true if this reference is valid for this instruction.
+  // - If it has an operand reference, then check that the instuction
+  //   definition has that operand.
+  // - If the phase expression contains operand references, check them too.
+  bool IsReferenceValid(const InstructionDef *instr,
+                        const Reference *reference) const {
+    return IsOperandValid(instr, reference->operand(), reference->ref_type()) &&
+           IsPhaseExprValid(instr, reference->phase_expr());
+  }
+
+  // Top level function for checking a set of reference predicates against
+  // a particular instruction definition.
+  ReferenceList *FilterReferences(const InstructionDef *instr,
+                                  ReferenceList &candidates, CpuInstance *cpu);
+  // Filter a single conditional reference.  Simplify if the predicate
+  // evaluates to true or false.
+  ConditionalRef *FilterConditionalRef(const InstructionDef *instr,
+                                       ConditionalRef *cond, CpuInstance *cpu);
+
+  MdlSpec &spec() { return spec_; }
+  auto &instruction_info() { return instruction_info_; }
+  bool gen_missing_info() const { return gen_missing_info_; }
+
+  // Write everything out to the C++ output file.
+  void Write(bool generate_llvm_defs);
+
+  // Dump everything we know about all the target instructions.
+  void DumpInstructions();
+
+  std::string file_name() const { return file_name_; }
+  std::string directory_name() const { return directory_name_; }
+
+private:
+  std::string directory_name_; // output directory name
+  std::string file_name_;      // original mdl filename
+  bool gen_missing_info_;      // reflects command line option of same name
+  MdlSpec &spec_;              // machine description specification
+  std::map<std::string, InstrInfoList> instruction_info_;
+};
+
+} // namespace mdl
+} // namespace mpact
+
+#endif // MDL_COMPILER_MDL_GENERATE_H_
diff --git a/llvm/utils/MdlCompiler/mdl_generate.cpp b/llvm/utils/MdlCompiler/mdl_generate.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_generate.cpp
@@ -0,0 +1,654 @@
+//===- mdl_generate.cpp - Generate the mdl database -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These functions perform the second half of the architecture expansion,
+// where we specialize each generated subunit instance for each instruction.
+// From this, we can generate the internal representation of the complete
+// database for the spec.
+//
+// The general algorithm for this is:
+//
+//    for each llvm instruction description:
+//       for each subunit it qualifies for:
+//          for each specialized instance of that subunit:
+//             further specialize the subunit for the current instruction
+//             add the final specialized subunit to the instruction definition
+//
+// After this pass, each instruction will have a set of subunit instances,
+// each of which has the following information:
+//   - a cpu and functional unit combination the instruction can run on,
+//   - the resources it uses (on that cpu/functional unit), and when,
+//   - any resource pool requirements for the instruction,
+//   - the latencies of all operand reads and writes,
+//   - any CPU and/or Functional unit register constraints imposed on the ins.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <iostream>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mdl_output.h"
+
+namespace mpact {
+namespace mdl {
+
+// Check that each operand in the instruction is mentioned in at least
+// one reference record. Unmentioned operands are a likely error.
+void InstrInfo::CheckUnreferencedOperands(bool check_all_operands) {
+  std::set<int> referenced_operands;
+  for (auto *ref : *references_)
+    if (ref->operand())
+      referenced_operands.insert(ref->operand()->operand_index());
+
+  for (unsigned op_id = 0; op_id < instruct_->flat_operands()->size(); op_id++)
+    if ((check_all_operands || instruct_->GetOperandDecl(op_id)->reg_class()) &&
+        !referenced_operands.count(op_id))
+      subunit_->WarningLog(
+          instruct_, "Operand{0}in instruction \"{1}\" is unreferenced",
+          StringVec(instruct_->GetOperandDecl(op_id)->op_names(), " \"", ".",
+                    "\" "),
+          instruct_->name());
+}
+
+// Stringify a InstrInfo object, suitable for writing debug information.
+std::string InstrInfo::ToString() const {
+  std::string out = formatv("{0}    Subunit: {1}.{2}\n", instruct_->ToString(),
+                            subunit_->func_unit()->cpu()->name(),
+                            subunit_->func_unit()->instance()->name());
+
+  out += "      Operand references:\n";
+  for (auto *ref : *references_)
+    if ((ref->operand() && ref->IsOperandRefType()) || ref->IsConditionalRef())
+      out += formatv("      ===>  {0}\n", ref->ToString());
+
+  if (!resource_refs_.empty())
+    out += "      FU references:\n";
+  for (auto *ref : resource_refs_)
+    if (ref->IsFuncUnitRef())
+      out += formatv("      --->  {0}\n", ref->ToString());
+
+  if (resources_.empty())
+    return out;
+  out += "      Resources:\n";
+  for (auto &res : resources_)
+    if (!res.resource()->HasCount())
+      out += formatv("            {0}\n", res.ToString());
+
+  out += "      Pool Resources:\n";
+  for (auto &res : resources_)
+    if (res.resource()->HasCount()) {
+      out += formatv("            {0} ", res.ToString());
+      SubPool subpool(res.resource());
+      auto &subpool_info = res.resource()->definition()->sub_pool(subpool);
+      out += formatv(" subpool id: {0}", subpool_info.subpool_id());
+      out += " size requests: ";
+      auto comma = "";
+      for (auto request : subpool_info.counts()) {
+        out += formatv("{0}{1}", comma, request);
+        comma = ",";
+      }
+      out += "\n";
+    }
+
+  out += "      Architectural Register Constraints:\n";
+  for (auto *ref : *references_)
+    if (auto *opnd = ref->operand())
+      if (auto *port = ref->port())
+        if (auto *reg_class = port->reg_class())
+          out += formatv("            operand {0}: {1}\n",
+                         opnd->operand_index(), reg_class->name());
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Find all functional-unit-related resources associated with this subunit
+// instance, including the implied functional unit resource (and its bases).
+// Note that we don't include any explicit functional units specified by an
+// fus() clause.
+//----------------------------------------------------------------------------
+void GetFuncUnitResources(SubUnitInstantiation *subunit, ResourceSets &res_set,
+                          PhaseName *phase) {
+  // Add all the implicit functional unit resources, including parent fu ids.
+  // Note: Each fu is added as an independent resource in the res set.
+  auto fu_resources = subunit->GetFuncUnitResources();
+  for (auto *def : fu_resources) {
+    // Don't write out catchall units.
+    if (is_catchall_name(def->name()))
+      continue;
+    auto *fu = new ResourceRef(def);
+    ResourceEvent fu_res(RefTypes::kUse, new PhaseExpr(phase), 1, fu);
+    std::vector<ResourceEvent> items{fu_res};
+    res_set.push_back(items);
+  }
+}
+
+//----------------------------------------------------------------------------
+// Build a resource set for this instruction instance.
+//----------------------------------------------------------------------------
+ResourceSets
+InstructionDatabase::BuildResourceSets(ResourceList &resources,
+                                       SubUnitInstantiation *subunit) {
+  ResourceSets res_set;
+  for (auto &item : resources) {
+    std::vector<ResourceEvent> items;
+
+    // Resource pools can be managed along with "Normal" resources, or
+    // separately. Its more computationally expensive at compile time to manage
+    // them with other resources ((N*P)^2 vs (N^2 + P^2)), but vastly more
+    // convenient to consider them together.  So if N+P is "small enough", we
+    // should just add pooled resources in with the normal resources (which the
+    // code below will accomplish).  But currently I don't think thats typically
+    // the case, so for now I've just commented this (working) code out.  We'll
+    // get much fewer resource sets, and have to model all pools in the compiler
+    // at compile time (which will typically be much faster).
+    // TODO(tbd) - Determine what kinds of pools this makes sense for.
+#if 0
+    // Handle resource pools.
+    ResourceRef *ref = item.resource();
+    if (ref->IsPool() && !has_shared_bits() &&
+        (ref->last() - ref->first()) < 4  &&
+        (ref->HasCount() == 1 || ref->operand_index() == -1)) {
+      for (int id = ref->first(); id <= ref->last(); id += ref->pool_count()) {
+        auto *newref = new ResourceRef(*ref);
+        newref->set_subrange(id, id + ref->pool_count() - 1);
+        newref->set_pool_count(-1);
+        ResourceEvent newevent(item.ref_type(), item.phase_expr(), newref);
+        items.push_back(newevent);
+      }
+      res_set.push_back(items);
+      continue;
+    }
+#endif
+#if 0
+    // Handle resource groups.
+    if (item.resource()->IsGroupRef() && !has_shared_bits() &&
+        item.resource()->definition()->members().size() < 4) {
+      for (auto mem : *item.resource()->definition()->member_defs()) {
+        auto *newref = new ResourceRef(*mem);
+        ResourceEvent newevent(item.ref_type(), item.phase_expr(), newref);
+        items.push_back(newevent);
+      }
+      res_set.push_back(items);
+      continue;
+    }
+#endif
+
+    // Handle single resource items.
+    items.push_back(item);
+    res_set.push_back(items);
+  }
+
+  // We need to identify a "default" phase to use for implicit resources.
+  auto *phase0 = spec_.FindFirstPhase();
+
+  // Add all the implicit functional unit resources, including parent fu ids.
+  // Note: Each fu is added as an independent resource in the res set.
+  GetFuncUnitResources(subunit, res_set, phase0);
+
+  // If we determined we don't need to model resource slots, we're done.
+  auto *cpu = subunit->cpu();
+  if (!cpu->needs_slot_resources())
+    return res_set;
+
+  // Add "any" slot resources associated with this functional unit instance.
+  // Note: we add all of them as a single pooled entry in the res_set.
+  auto *slots_any = subunit->GetSlotResourcesAny();
+  if (slots_any && !slots_any->empty()) {
+    std::vector<ResourceEvent> items;
+    for (auto *pin_any : *slots_any) {
+      auto *slot = new ResourceRef(*pin_any);
+      auto *phase = phase0;
+      if (slot->definition()->start_phase())
+        phase = spec_.FindPipeReference(slot->definition()->start_phase(), cpu);
+      items.emplace_back(RefTypes::kUse, new PhaseExpr(phase), slot);
+    }
+    res_set.push_back(items);
+  }
+
+  // Add "all" slot resources associated with this functional unit instance.
+  // Note: Each slot is added as an independent resource in the res_set.
+  // TODO: we might want to try just doing pooled allocations for these - all
+  // the mechanism exists, we just need to create a pooled allocation
+  // ResourceEvent, and the backend does the rest.
+  auto *slots_all = subunit->GetSlotResourcesAll();
+  if (slots_all && !slots_all->empty()) {
+    for (auto *pin_all : *slots_all) {
+      auto *slot = new ResourceRef(*pin_all);
+      auto *phase = phase0;
+      if (slot->definition()->start_phase())
+        phase = spec_.FindPipeReference(slot->definition()->start_phase(), cpu);
+      ResourceEvent pin_res(RefTypes::kUse, new PhaseExpr(phase), slot);
+      std::vector<ResourceEvent> items{pin_res};
+      res_set.push_back(items);
+    }
+  }
+
+  return res_set;
+}
+
+//----------------------------------------------------------------------------
+// Build a set of all possible resource combinations found in the input
+// resource set.
+//----------------------------------------------------------------------------
+void BuildResourceCombos(ResourceSets &res_set, unsigned index,
+                         std::vector<ResourceEvent> &current,
+                         ResourceSets &result) {
+  if (index == res_set.size()) {
+    result.push_back(current);
+    return;
+  }
+
+  for (auto &resource : res_set[index]) {
+    current.push_back(resource);
+    BuildResourceCombos(res_set, index + 1, current, result);
+    current.pop_back();
+  }
+}
+
+//----------------------------------------------------------------------------
+// Annotate phase expressions with instruction-specific operand information.
+//----------------------------------------------------------------------------
+void AnnotatedPhaseExpr(const InstructionDef *instr, PhaseExpr *expr,
+                        MdlSpec &spec, CpuInstance *cpu) {
+  if (expr->operation() == kOpnd) {
+    int index = spec.GetOperandIndex(instr, expr->operand(), RefTypes::kUse);
+    expr->operand()->set_operand_index(index);
+    expr->operand()->set_operand_decl(instr->GetOperandDecl(index));
+    return;
+  }
+  if (expr->operation() == kPhase) {
+    if (spec.SpecializePhaseExpr(expr, cpu) && !expr->IsExpressionLegal())
+      spec.ErrorLog(expr, "Invalid phase expression: divide by zero");
+    return;
+  }
+
+  if (expr->left())
+    AnnotatedPhaseExpr(instr, expr->left(), spec, cpu);
+  if (expr->right())
+    AnnotatedPhaseExpr(instr, expr->right(), spec, cpu);
+}
+
+//----------------------------------------------------------------------------
+// Annotate a reference with instruction-specific operand information.
+//----------------------------------------------------------------------------
+Reference *AnnotatedReference(const InstructionDef *instr, Reference *ref,
+                              int delay, MdlSpec &spec, CpuInstance *cpu) {
+  auto *newref = new Reference(*ref, delay); // Make a private copy.
+  ref->set_used(); // Note that we used this reference.
+  if (newref->operand()) {
+    int index = spec.GetOperandIndex(instr, newref->operand(), ref->ref_type());
+    newref->operand()->set_operand_index(index);
+    newref->operand()->set_operand_decl(instr->GetOperandDecl(index));
+  }
+
+  AnnotatedPhaseExpr(instr, newref->phase_expr(), spec, cpu);
+  newref->SetConstantPhase(); // Evaluate constant phase expressions.
+  return newref;
+}
+
+// Return true if two register class have any common registers.
+bool classes_overlap(const RegisterDefList *a, const RegisterDefList *b) {
+  for (auto *item_a : *a)
+    for (auto *item_b : *b)
+      if (item_a->name() == item_b->name())
+        return true;
+  return false;
+}
+
+// Return true if any of any instruction's port constraints are incompatible
+// with operand constraints.  This is a nice optimization to prune subunits
+// whose port constraints are incompatible with an instruction's operand
+// constraints.  It is ok to be conservative. We skip conditional references
+// since the predicates could impact whether a reference is used or not.
+bool HasIncompatibleConstraints(const ReferenceList *references) {
+  for (const auto *ref : *references)
+    if (!ref->IsConditionalRef())
+      if (const auto *opnd = ref->operand())
+        if (const auto op_decl = opnd->operand_decl())
+          if (const auto *op_class = op_decl->reg_class())
+            if (const auto *port = ref->port())
+              if (const auto *pclass = port->reg_class())
+                if (!classes_overlap(pclass->members(), op_class->members()))
+                  return true;
+  return false;
+}
+
+// Filter conditional references, and return a filtered copy.
+// If there's no predicate, just filter the refs (this is an "else" clause).
+// if there's a predicate, evaluate it:
+//   - if its true, filter and return its associated reference list.
+//   - if its false, filter and return its "else" clause (if there is one)
+//   - if it cannot be fully evaluated, create a copy with filtered references
+//     and else clause.
+ConditionalRef *InstructionDatabase::FilterConditionalRef(
+    const InstructionDef *instr, ConditionalRef *cond, CpuInstance *cpu) {
+  // If the entire clause is missing, just return nullptr.
+  if (cond == nullptr)
+    return nullptr;
+  auto *pred = cond->predicate();
+  // If the predicate is missing, it evaluates to True, so return filtered refs.
+  if (pred == nullptr)
+    return new ConditionalRef(
+        *cond, nullptr, FilterReferences(instr, cond->refs(), cpu), nullptr);
+
+  // Evaluate the predicate, and if it is True or False, we can simplify.
+  auto *value = spec_.EvaluatePredicate(pred->name(), instr);
+  if (value->IsTrue())
+    return new ConditionalRef(
+        *cond, nullptr, FilterReferences(instr, cond->refs(), cpu), nullptr);
+  if (value->IsFalse())
+    return FilterConditionalRef(instr, cond->else_clause(), cpu);
+
+  // If we can't completely evaluate the predicate, create a copy, filter its
+  // references, and recur on its else clause.
+  auto *then_clause = FilterReferences(instr, cond->refs(), cpu);
+  auto *else_clause = FilterConditionalRef(instr, cond->else_clause(), cpu);
+  auto *newcond = new ConditionalRef(*cond, pred, then_clause, else_clause);
+  newcond->SetInstrPredicate(value);
+  return newcond;
+}
+
+// Given a list of references, determine if each reference is valid for the
+// specified instruction.  Return a filtered list of references.
+ReferenceList *InstructionDatabase::FilterReferences(
+    const InstructionDef *instr, ReferenceList &candidates, CpuInstance *cpu) {
+  auto *refs = new ReferenceList;
+
+  for (auto *ref : candidates) {
+    ref->set_seen(); // Note that we've seen a possible reference to this.
+
+    // If it's not a conditional reference, check to see if its valid for
+    // this instruction, and if it is valid add to the reference list.
+    // Expand WriteSequences to discrete references.
+    if (!ref->IsConditionalRef()) {
+      if (IsReferenceValid(instr, ref)) {
+        refs->push_back(AnnotatedReference(instr, ref, 0, spec_, cpu));
+        int delay = ref->delay();
+        for (int repeat = 1; repeat < ref->repeat(); repeat++) {
+          refs->push_back(AnnotatedReference(instr, ref, delay, spec_, cpu));
+          delay += ref->delay();
+        }
+      }
+      continue;
+    }
+
+    // Recursively filter each conditional reference.  If the result is a
+    // single unconditional ConditionalRef object, just add all of its
+    // references to the list. Otherwise, add the conditional reference to
+    // the list.
+    auto *cond = FilterConditionalRef(instr, ref->conditional_ref(), cpu);
+    if (cond == nullptr)
+      continue;
+    if (cond->predicate() != nullptr)
+      refs->push_back(new Reference(*cond, nullptr, cond));
+    else
+      refs->insert(refs->end(), cond->refs().begin(), cond->refs().end());
+  }
+
+  return refs;
+}
+
+//----------------------------------------------------------------------------
+// If there are unreferenced output operands in an instruction, we need at
+// least one Def in the database to use for the default latency.  This is
+// a hack to handle cases where LLVM has unmentioned defs or dynamically adds
+// defs to an instruction instance. Scan a reference list looking for the
+// kFu with the largest latency, and create a default def for that list.
+// These will provide the compiler with "default" def phases.
+// Return the number of default defs inserted.
+//----------------------------------------------------------------------------
+int AddDefaultDefs(ReferenceList &refs, CpuInstance *cpu, MdlSpec &spec) {
+  ReferenceList defs;
+  int count = 0;
+
+  // Scan conditional reference list for defs.
+  for (auto *ref : refs) {
+    for (auto *cond = ref->conditional_ref(); cond; cond = cond->else_clause())
+      count += AddDefaultDefs(cond->refs(), cpu, spec);
+  }
+
+  // Scan the references looking for the latest Def or Use. If no defs are
+  // found, add a default def.
+  Reference *latest = nullptr;
+  Reference *latest_def = nullptr;
+  int latest_latency = -1;
+
+  for (auto *ref : refs) {
+    if (ref->IsDef() || ref->IsUse()) {
+      if (ref->phase_expr()->IsExpressionConstant()) {
+        int latency = ref->phase_expr()->EvaluateConstantExpression();
+        if (latest == nullptr || latency > latest_latency ||
+            (latency == latest_latency && latest->IsDef())) {
+          latest = ref;
+          latest_latency = latency;
+        }
+      }
+    }
+  }
+
+  // If we haven't seen a def, create a default reference, either the latest
+  // Use, or the first execute phase for the CPU.
+  if (latest_def == nullptr) {
+    auto *opnd = new OperandRef("<default_operand>");
+    if (latest)
+      refs.push_back(new Reference(RefTypes::kDef, latest->phase_expr(), opnd));
+    else
+      refs.push_back(
+          new Reference(RefTypes::kDef, spec.FindFirstExecutePhase(cpu), opnd));
+    count++;
+  }
+  return count;
+}
+
+//----------------------------------------------------------------------------
+// Given an instruction and a reference list, create a set of referenced
+// operand indexes.
+//----------------------------------------------------------------------------
+void InstructionDatabase::FindCondReferencedOperands(
+    const InstructionDef *instr, ConditionalRef *cond, CpuInstance *cpu,
+    std::set<int> &found) {
+  if (cond == nullptr)
+    return;
+  FindReferencedOperands(instr, &cond->refs(), cpu, found);
+  FindCondReferencedOperands(instr, cond->else_clause(), cpu, found);
+}
+
+void InstructionDatabase::FindReferencedOperands(const InstructionDef *instr,
+                                                 ReferenceList *refs,
+                                                 CpuInstance *cpu,
+                                                 std::set<int> &found) {
+  if (refs == nullptr)
+    return;
+  for (const auto *ref : *refs) {
+    if (ref->IsConditionalRef())
+      FindCondReferencedOperands(instr, ref->conditional_ref(), cpu, found);
+    else if (ref->operand() != nullptr)
+      found.insert(ref->operand()->operand_index());
+  }
+}
+
+//----------------------------------------------------------------------------
+// Find unreferenced output operands, and create default references for them.
+//----------------------------------------------------------------------------
+void InstructionDatabase::AddUnreferencedOperandDefs(
+    const InstructionDef *instr, ReferenceList *refs, CpuInstance *cpu) {
+  // First find all the referenced operands.
+  std::set<int> referenced_opnds;
+  FindReferencedOperands(instr, refs, cpu, referenced_opnds);
+
+  // Find register operands that have no references, create a vector of them.
+  std::vector<OperandRef *> opnds;
+  for (unsigned op_id = 0; op_id < instr->flat_operands()->size(); op_id++)
+    if (referenced_opnds.count(op_id) == 0) {
+      auto *opnd = instr->GetOperandDecl(op_id);
+      if (opnd->is_input())
+        continue;
+      auto *back = opnd->types()->back();
+      auto *front = opnd->types()->front();
+
+      // See if the operand declaration is a register or a register class.
+      if (auto *rclass = FindItem(spec_.reg_classes(), back->name())) {
+        opnds.push_back(new OperandRef(front, opnd->op_names(), rclass, op_id));
+      } else if (FindItem(spec_.registers(), back->name()) != nullptr) {
+        opnds.push_back(new OperandRef(nullptr, new IdList(1, back), op_id));
+      }
+    }
+
+  // If we found unreferenced output operands, add "default" defs to
+  // represent the worst-case pipeline phase for unspecified defs.
+  // If only one default operand was added, and it's the last item in the
+  // reference list, just use its latency and remove it from the list.
+  PhaseExpr *def_latency = nullptr;
+  if (!opnds.empty() && AddDefaultDefs(*refs, cpu, spec_) == 1 &&
+      refs->back()->IsDefaultOperandRef()) {
+    def_latency = refs->back()->phase_expr();
+    refs->pop_back();
+  } else {
+    def_latency = PhaseExpr::DefaultLatency();
+  }
+
+  // We found unreferenced register-based output operands, so create a
+  // references for them.
+  for (auto *opnd : opnds)
+    refs->push_back(new Reference(RefTypes::kDef, def_latency, opnd));
+}
+
+//----------------------------------------------------------------------------
+// Generate all instruction information records for a target instruction.
+// Instructions can have more than one subunit.  If so, instantiate them all.
+//----------------------------------------------------------------------------
+void InstructionDatabase::GenerateInstructionInfo(InstructionDef *instr) {
+  // For each subunit, create reference records for this instruction.
+  for (auto *subunit : *instr->subunits())
+    for (auto *unit : *spec_.su_instantiations()[subunit->name()]) {
+      // Mark this subunit as used.
+      spec().su_map()[subunit->name()]->inc_use();
+
+      // Create a list of valid references for this subunit.
+      // Check each reference to see if its valid for this instruction.
+      auto *cpu = unit->cpu();
+      auto *refs = FilterReferences(instr, unit->references(), cpu);
+
+      // Check each reference for incompatible constraints imposed
+      // by ports. These are not valid subunits, and we don't want to add
+      // this subunit instance to the database.
+      if (HasIncompatibleConstraints(refs))
+        continue;
+
+      // Sort the references by pipeline phase. This is primarily to order
+      // operand references by type and phase for cosmetic reasons.
+      std::stable_sort(
+          refs->begin(), refs->end(),
+          [](const Reference *a, const Reference *b) { return *a < *b; });
+
+      // Add defs for unreferenced register operand defs.  This isn't
+      // necessary, so its currently disabled.
+      // AddUnreferencedOperandDefs(instr, refs, cpu);
+
+      // Given a list of validated references, create a list of events for
+      // unconditional resource references. At this point, we don't add
+      // FUs and conditional refs - these are added later for each combination
+      // of unconditional resource refs.
+      // For each port reference, add it and its associated resources.
+      // For each pooled reference, annotate it with its operand index.
+      // For resources associated with operands:
+      //      - they are always "used".
+      //      - tag the resource reference with its associate operand index.
+      //      - If the resource has a defined cycle id, use it.
+      ReferenceList resource_refs;
+      ResourceList resources;
+      for (auto *ref : *refs) {
+        // Don't add functional unit and conditional references, just add them
+        // to the resource reference list for this instruction/subunit.
+        if (ref->IsFuncUnitRef() ||
+            (ref->IsConditionalRef() &&
+             ref->conditional_ref()->HasResourceRefs())) {
+          resource_refs.push_back(ref);
+          continue;
+        }
+        // Add all other resource references.
+        auto ref_type = ref->AdjustResourceReferenceType();
+        for (auto *res : *ref->resources()) {
+          if (!res->IsNull()) {
+            PhaseExpr *phase = ref->phase_expr();
+
+            // If the resource definition has a specified phase, use it instead.
+            if (auto *start = res->definition()->start_phase())
+              phase = new PhaseExpr(spec_.FindPipeReference(start, cpu));
+            if (ref->operand())
+              res->set_operand_index(ref->operand()->operand_index());
+            resources.emplace_back(ref_type, phase, ref->use_cycles(), res, ref,
+                                   unit);
+          }
+        }
+      }
+
+      // Create sets of reference resource combinations.
+      ResourceSets res_set = BuildResourceSets(resources, unit);
+      ResourceList current;
+      ResourceSets resource_combos;
+      BuildResourceCombos(res_set, 0, current, resource_combos);
+
+      //----------------------------------------------------------------------
+      // AND FINALLY: For the current instruction, for each subunit, for each
+      // resource combination, create an instruction record that captures all
+      // of this information and add it to the instruction database.
+      //----------------------------------------------------------------------
+      for (auto &res : resource_combos) {
+        auto *new_inst = new InstrInfo(instr, unit, res, refs, resource_refs);
+        instruction_info_[instr->name()].push_back(new_inst);
+      }
+    }
+}
+
+//----------------------------------------------------------------------------
+// Dump everything we know about all the instructions.
+//----------------------------------------------------------------------------
+void InstructionDatabase::DumpInstructions() {
+  std::cout << "\n---------------------------------------------------------\n";
+  std::cout << " Instruction info for \"" << file_name_ << "\"";
+  std::cout << "\n---------------------------------------------------------\n";
+
+  // Debug: dump out all the instruction information we've generated.
+  for (auto &instruct_list : instruction_info_)
+    for (auto &instruct : instruct_list.second)
+      instruct->dump();
+}
+
+//----------------------------------------------------------------------------
+// Start the process of generating the final instruction information.
+//----------------------------------------------------------------------------
+InstructionDatabase::InstructionDatabase(std::string directory_name,
+                                         std::string file_name,
+                                         bool gen_missing_info, MdlSpec &spec)
+    : directory_name_(directory_name), file_name_(file_name),
+      gen_missing_info_(gen_missing_info), spec_(spec) {
+  // Add all the target instructions to the instruction database.
+  for (auto *instr : spec.instructions())
+    if (!instr->subunits()->empty())
+      GenerateInstructionInfo(instr);
+}
+
+//----------------------------------------------------------------------------
+// Write out the entire database to the output C++ file.
+//----------------------------------------------------------------------------
+void InstructionDatabase::Write(bool generate_llvm_defs) {
+  OutputState output(this, generate_llvm_defs);
+  output.WriteHeader();
+  output.WriteLLVMDefinitions();
+  output.WriteResourceDefinitions();
+  output.WriteCpuTable();
+  output.WriteExterns();
+  output.WriteTrailer();
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/MdlCompiler/mdl_main.cpp b/llvm/utils/MdlCompiler/mdl_main.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_main.cpp
@@ -0,0 +1,233 @@
+//===- mdl_main.cpp - Top level program for MDL compiler ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Architecture Machine Description Compiler.
+// Read in a machine description for an architecture, parse it, do
+// semantic error checking, build instruction database, and write it out.
+//
+//===----------------------------------------------------------------------===//
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "mdl_generate.h"
+#include "mdl_visitor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+
+//-------------------------------------------------------------------------
+// Command line flags.
+//-------------------------------------------------------------------------
+cl::opt<std::string> input_file(cl::Positional, cl::desc("<input file>"));
+
+cl::OptionCategory MdlOutput("Output options");
+cl::opt<std::string> output_dir("output_dir", cl::desc("Output directory"),
+                                cl::init(""), cl::value_desc("dir"),
+                                cl::cat(MdlOutput));
+cl::opt<std::string> import_dir("import_dir", cl::desc("Import directory"),
+                                cl::init(""), cl::value_desc("dir"),
+                                cl::cat(MdlOutput));
+cl::opt<bool>
+    gen_missing_info("gen_missing_info",
+                     cl::desc("Generate missing info for instructions"),
+                     cl::cat(MdlOutput), cl::init(false));
+
+cl::OptionCategory MdlDiags("Diagnostic options");
+cl::opt<bool> warnings("warnings", cl::desc("Print warnings"),
+                       cl::cat(MdlDiags), cl::init(false));
+cl::opt<bool> fatal_warnings("fatal_warnings",
+                             cl::desc("Treat warnings as errors"),
+                             cl::cat(MdlDiags), cl::init(false));
+cl::opt<bool>
+    check_usage("check_usage",
+                cl::desc("Check subunit, reference, and resource usage"),
+                cl::cat(MdlDiags), cl::init(false));
+cl::opt<bool> check_all_operands(
+    "check_all_operands",
+    cl::desc("Check references to all operands - not just registers"),
+    cl::cat(MdlDiags), cl::init(false));
+
+cl::OptionCategory MdlDebug("Debugging options");
+cl::opt<bool> dump_resources("dump_resources", cl::desc("Dump resource ids"),
+                             cl::init(false), cl::cat(MdlDebug));
+cl::opt<bool> dump_fus("dump_fus",
+                       cl::desc("Dump functional unit instantiations"),
+                       cl::init(false), cl::cat(MdlDebug));
+cl::opt<bool> dump_sus("dump_sus", cl::desc("Dump subunit instantiations"),
+                       cl::init(false), cl::cat(MdlDebug));
+cl::opt<bool> dump_spec("dump_spec", cl::desc("Dump entire mdl specification"),
+                        cl::init(false), cl::cat(MdlDebug));
+cl::opt<bool> dump_instr("dump_instr", cl::desc("Dump instruction information"),
+                         cl::init(false), cl::cat(MdlDebug));
+cl::opt<bool> dump_preds("dump_preds", cl::desc("Dump user-defined predicates"),
+                         cl::init(false), cl::cat(MdlDebug));
+cl::opt<bool> generate_llvm_defs("gen_llvm_defs",
+                                 cl::desc("Generate LLVM definitions"),
+                                 cl::init(false), cl::cat(MdlDebug));
+
+//-------------------------------------------------------------------------
+// Process command lines and do some cursory error checking.
+//-------------------------------------------------------------------------
+static void usage(int argc, char **argv) {
+  if (argc < 2) {
+    llvm::errs() << "Usage: mdl [flags] <input-file>\n"
+                    "    --help: print program options\n";
+    exit(EXIT_FAILURE);
+  }
+
+  // If user specifies check_all_options, do some other checking too.
+  if (check_all_operands)
+    check_usage = true;
+
+  // Disable some flags we don't particularly want to see.
+  cl::getRegisteredOptions()["help-list"]->setHiddenFlag(cl::ReallyHidden);
+  cl::getRegisteredOptions()["version"]->setHiddenFlag(cl::Hidden);
+  cl::getRegisteredOptions()["color"]->setHiddenFlag(cl::ReallyHidden);
+  cl::ParseCommandLineOptions(argc, argv, "MDL Compiler");
+
+  if (input_file.empty()) {
+    llvm::errs() << "Error: no input file\n";
+    exit(EXIT_FAILURE);
+  }
+}
+
+//-------------------------------------------------------------------------
+// Parse the input machine description, error check it, build a database
+// of all instruction information, and write it out to a C file.
+//-------------------------------------------------------------------------
+int main(int argc, char **argv) {
+  // Process command line options.
+  usage(argc, argv);
+
+  // Create object which collects all the information from the input files.
+  mpact::mdl::MdlSpec spec(warnings, fatal_warnings);
+
+  //--------------------------------------------------------------------------
+  // First Pass: Parse the input file, and build a representation of the
+  // entire machine description. Abort if syntax errors found.
+  //--------------------------------------------------------------------------
+  mpact::mdl::MdlVisitor visitor(spec, import_dir);
+  if (!visitor.ProcessInputFile(input_file))
+    mpact::mdl::Abort();
+
+  // Second Pass: Perform semantic checking on the specification, and clean
+  // up the representation so that later passes don't have to look things up.
+  //--------------------------------------------------------------------------
+  // Build dictionaries for functional unit, subunit, and latency templates.
+  spec.BuildDictionaries();
+  // Create templates for implicitly defined functional units.
+  spec.FindImplicitFuncUnitTemplates();
+  // Check for duplicate definitions, and for valid pipe phase references.
+  spec.CheckForDuplicateDefs();
+  // Check resource definitions for correctness.
+  spec.CheckResourceDefs();
+  // Check subunit references in instructions.
+  spec.CheckPipeReferences();
+  // Add globally defined resources to each defined CPU.
+  spec.PromoteGlobalResources();
+  // Promote resource group members to regular resource definitions.
+  spec.PromoteResourceGroups();
+  // Check operand references in instructions, operands, and latencies.
+  spec.CheckInstructions();
+  spec.CheckOperands();
+
+  // Make sure all instruction have subunits.  If they don't, add a default.
+  if (gen_missing_info)
+    spec.CheckInstructionSubunits();
+
+  // Check that base templates exist and have compatible parameters.
+  // Explicitly link templates (fu, su, latency) to their bases.
+  spec.CheckTemplateBases();
+  // Check that each instantiation refers to a valid template, and they have
+  // compatible parameters/arguments.
+  spec.CheckInstantiations();
+  // Check references in latency templates for correctness.
+  spec.CheckReferences();
+  // Determine if we need to explicitly manage issue slots.
+  spec.CheckIssueSlots();
+  // Scan predicate table and do logical simplification on predicates.
+  spec.SimplifyPredicates();
+
+  // If we've seen any semantic errors, abort.
+  if (spec.ErrorsSeen())
+    mpact::mdl::Abort();
+
+  // Scan latencies for functional unit specifiers. For each specifier
+  // add implicit subunit instances to any CPUs which instantiate the FU.
+  spec.TieSubUnitsToFunctionalUnits();
+
+  // A derived subunit should be added to any instruction which is tied to
+  // any of the subunit's base subunits.
+  spec.TieDerivedSubUnitsToInstructions();
+
+  // Check that the input spec has some basic required components.
+  spec.CheckInputStructure();
+
+  //--------------------------------------------------------------------------
+  // Third Pass: Build the internal representation of the processor database.
+  // This process has several steps:
+  //--------------------------------------------------------------------------
+  // For each CPU definition, perform the instantiation of each functional
+  // unit, which recursively expands subunits and latency instances.
+  spec.InstantiateFunctionalUnits();
+
+  // For each CPU, build a dictionary of instances for each used functional
+  // unit template.
+  spec.BuildFuncUnitInstancesMap();
+
+  // For each instruction, create instruction behaviors for each processor
+  // and functional unit that it can run on.
+  mpact::mdl::InstructionDatabase instruction_info(output_dir, input_file,
+                                                   gen_missing_info, spec);
+  // Assign ids to every defined resource.
+  spec.AssignResourceIds();
+  // Assign pool ids to each pooled resource.
+  spec.AssignPoolIds();
+
+  //--------------------------------------------------------------------------
+  // Fourth Pass: do consistency checking, dump requested debug information.
+  //--------------------------------------------------------------------------
+  if (check_usage) {
+    // Check for operands that never match a reference.
+    instruction_info.CheckUnreferencedOperands(check_all_operands);
+    // Check for latency referenced that never match instructions.
+    spec.CheckReferenceUse();
+    // Also check for subunits that are never instantiated.
+    spec.CheckSubunitUse();
+    // Look for unreferenced resources.
+    spec.CheckResourceUse();
+  }
+
+  // If we encountered any errors during database generation, abort.
+  if (spec.ErrorsSeen())
+    mpact::mdl::Abort();
+
+  // Debug stuff - write out what we know about the machine.
+  if (dump_resources)
+    spec.DumpResourceIds();
+  if (dump_fus)
+    spec.DumpFuncUnitInstantiations();
+  if (dump_sus)
+    spec.DumpSubUnitInstantiations();
+  if (dump_spec)
+    std::cout << spec.ToString();
+  if (dump_instr)
+    instruction_info.DumpInstructions();
+  if (dump_preds)
+    spec.DumpPredicates();
+
+  //--------------------------------------------------------------------------
+  // Output Pass: Generate the output files.
+  //--------------------------------------------------------------------------
+  instruction_info.Write(generate_llvm_defs);
+  return EXIT_SUCCESS;
+}
diff --git a/llvm/utils/MdlCompiler/mdl_output.h b/llvm/utils/MdlCompiler/mdl_output.h
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_output.h
@@ -0,0 +1,247 @@
+//===- mdl_output.h - Definitions for writing out an MDL database ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions used to manage the creation of the C++ output
+// database, and in particular automatically avoid generating duplicate
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MDL_COMPILER_MDL_OUTPUT_H_
+#define MDL_COMPILER_MDL_OUTPUT_H_
+
+#include <algorithm>
+#include <fstream>
+#include <map>
+#include <ostream>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "mdl.h"
+#include "mdl_generate.h"
+
+namespace mpact {
+namespace mdl {
+
+using OutputSet = std::map<std::string, int>;
+
+class OutputState {
+public:
+  using FormatName = std::string (*)(int);
+
+  // Function to add entries to OutputSet tables. The purpose of these tables
+  // is to avoid duplication in generated output. When an output string is
+  // added to a table, if it already exists return its unique index in the
+  // table. If it's new, add it and allocate it a new id.
+  static int AddEntry(OutputSet &table, std::string &entry) {
+    return table.emplace(entry, table.size()).first->second;
+  }
+
+  explicit OutputState(InstructionDatabase *database, bool generate_llvm_defs)
+      : generate_llvm_defs_(generate_llvm_defs), database_(database) {
+    OpenOutputFiles();
+  }
+  ~OutputState() {
+    output_c().close();
+    output_t().close();
+    output_h().close();
+  }
+
+  // Top level function to handle output of the database.
+  void WriteCpuTable();
+  void WriteExterns();
+  void WriteHeader();
+  void WriteTrailer();
+  void WriteSpecDefinitions();
+  void WriteLLVMDefinitions();
+  void WriteResourceDefinitions();
+
+  MdlSpec &spec() const { return database_->spec(); }
+
+  // Format a predicate function and add to the reference predicates table.
+  int FormatPredicate(PredExpr *pred);
+  // Format a predicate for the output file.
+  std::string FormatPredicateFunc(PredExpr *expr);
+
+  OutputSet &reference_predicates() { return reference_predicates_; }
+  OutputSet &virtual_ref_predicates() { return virtual_ref_predicates_; }
+  bool generate_llvm_defs() const { return generate_llvm_defs_; }
+
+private:
+  // Open the output files, abort if it cannot be opened.
+  void OpenOutputFiles();
+
+  // Format a function body to generate non-trivial phases.
+  std::string FormatPhaseExpr(const PhaseExpr *expr) const;
+  // Format a pipeline phase reference.
+  std::string FormatPhase(const PhaseExpr *expr);
+
+  // Format a conditional reference for a single operand.
+  std::string FormatSingleConditionalOperand(ConditionalRef *cond);
+  // Format a non-trivial conditional operand ref.
+  std::string FormatConditionalOperandRef(ConditionalRef *cond);
+  // Format a conditional operand else clause.
+  std::string FormatIfElseOperandRef(ConditionalRef *cond);
+  // Format an operand reference.
+  std::string FormatOperandReference(const Reference *ref);
+  // Format an operand reference list, which can be share between subunits.
+  std::string FormatOperandReferenceList(const ReferenceList *refs);
+  // Format a single resource reference.
+  std::string FormatResourceReference(const ResourceEvent &ref);
+  // Format an operand reference list, which can be shared between subunits.
+  std::string FormatResourceReferences(InstrInfo *info, RefType type,
+                                       OutputSet &output_list, FormatName name);
+  // Format a conditional resource reference.
+  std::string FormatIfElseResourceRef(SubUnitInstantiation *subunit,
+                                      ConditionalRef *ref, RefType type,
+                                      OutputSet &output_list, FormatName name,
+                                      bool format_pooled_refs);
+  std::string FormatResourceReferenceList(SubUnitInstantiation *subunit,
+                                          ReferenceList &list, RefType type,
+                                          OutputSet &output_list,
+                                          FormatName name,
+                                          bool format_pooled_refs);
+  // Format an explicit functional unit reference.
+  std::string FormatFuncUnitReference(SubUnitInstantiation *subunit,
+                                      Reference *ref, bool format_pooled_ref);
+
+  // Format a table of resource ids for a group.
+  std::string FormatResourceGroup(const ResourceEvent &ref);
+  // Format a reference to a pooled count, which may include a function.
+  std::string FormatPooledCount(const ResourceEvent &ref);
+  // Format a reference to a pool values function.
+  std::string FormatPoolValues(const ResourceEvent &ref);
+  // Format a single pool descriptor.
+  std::string FormatPoolDescriptor(const ResourceEvent &ref);
+  // Format a single pooled resource reference.
+  std::string FormatPooledResourceReference(const ResourceEvent &ref);
+  // Format a pooled reference list, which can be share between subunits.
+  std::string FormatPooledResourceReferences(InstrInfo *info,
+                                             OutputSet &output_list,
+                                             FormatName name);
+  // Format a single constraint.  Return an empty string if no constraint found.
+  std::string FormatConstraint(const Reference *ref);
+  // Find and format a list of constraints. Not all operands have constraints,
+  // so the resulting string could be empty.
+  std::string FormatConstraintList(ReferenceList *refs);
+  // Format a single conditional constraint.
+  std::string FormatIfElseConstraint(ConditionalRef *cond);
+  // Format all the conditional and unconditional constraints for an instr.
+  std::string FormatPortReferences(InstrInfo *info);
+  // Format a single subunit.  These are also shared between instructions.
+  std::string FormatSubunit(InstrInfo *info);
+  // Format a subunit set for an instruction on a single CPU.
+  std::string FormatSubunits(const std::string &instr,
+                             const InstrInfoList &info_list,
+                             const std::string &cpuname);
+
+  // Methods for writing out parts of the machine description.
+  void WriteTable(const OutputSet &objects, const std::string &type,
+                  const std::string &suffix, FormatName name,
+                  const std::string &title, const std::string &info = "");
+  void WriteVectorTable(const OutputSet &objects, const std::string &type,
+                        FormatName name, const std::string &title,
+                        const std::string &info = "");
+  void WritePhases(const OutputSet &phases, FormatName name) const;
+  void WritePoolCountFunctions(const OutputSet &funcs, FormatName name) const;
+  void WritePoolValueFunctions(const OutputSet &funcs, FormatName name) const;
+  void WritePredicateFunctions(const OutputSet &funcs, FormatName name,
+                               const std::string &type,
+                               std::fstream &output) const;
+  void WriteVirtualPredicateTable(const OutputSet &funcs) const;
+  void WriteClasses(const OutputSet &reg_classes, FormatName name);
+  void WriteInstructionInfo() const;
+  void WriteInstructionTables() const;
+
+  // Methods for generating and writing out forwarding networks.
+  class FwdNetwork {
+  public:
+    explicit FwdNetwork(int units) : units_(units) {
+      matrix_ = new int8_t *[units];
+      for (int i = 0; i < units; i++)
+        matrix_[i] = new int8_t[units]();
+    }
+    ~FwdNetwork() {
+      for (unsigned i = 0; i < units_; i++)
+        delete matrix_[i];
+      delete matrix_;
+    }
+    void set(int from, int to, int weight) { matrix_[from][to] = weight; }
+    int get(int from, int to) const { return matrix_[from][to]; }
+    int units() const { return units_; }
+
+  private:
+    int8_t **matrix_;
+    unsigned units_; // number of functional units modeled
+  };
+
+  void ExpandForwardStmt(FwdNetwork &network, const CpuInstance *cpu,
+                         const ClusterInstance *cluster,
+                         const ForwardStmt *fwd) const;
+  std::vector<int> FindUnitIds(const CpuInstance *cpu,
+                               const ClusterInstance *cluster,
+                               const Identifier *name) const;
+  std::string FormatForwardingInfo(const CpuInstance *cpu, FwdNetwork &network);
+  void GenerateForwardingInfo();
+
+  void WriteCpuList() const;
+
+  std::fstream &output_c() const { return *output_c_; }
+  std::fstream &output_t() const { return *output_t_; }
+  std::fstream &output_h() const { return *output_h_; }
+
+  bool generate_llvm_defs_;      // generate defs for stand-alone tools
+  std::set<int> forward_phases_; // funcs which need forward decls
+  std::set<std::string> forward_opnd_refs_;       // OperandRef forward refs
+  std::set<std::string> forward_resource_refs_;   // conditional resource refs
+  std::set<std::string> forward_pooled_refs_;     // conditional pooled refs
+  std::set<std::string> forward_constraint_refs_; // conditional constraints
+
+  std::set<std::string> forward_cond_opnd_refs_;
+  std::set<std::string> forward_cond_res_refs_;
+  std::set<std::string> forward_cond_pool_refs_;
+  std::set<std::string> forward_cond_constraint_refs_;
+
+  OutputSet phases_;                    // non-trivial pipeline phases
+  OutputSet register_classes_;          // register classes
+  OutputSet operand_refs_;              // operand reference lists
+  OutputSet cond_operand_refs_;         // conditional operand references
+  OutputSet cond_resource_refs_;        // conditional resource references
+  OutputSet cond_pooled_resource_refs_; // conditional resource references
+  OutputSet cond_constraints_;          // conditional constraint references
+  OutputSet used_resource_refs_;        // used resource reference lists
+  OutputSet held_resource_refs_;        // held resource reference lists
+  OutputSet reserved_resource_refs_;    // reserved resource reference lists
+  OutputSet resource_groups_;           // all unique resource groups
+  OutputSet pool_descriptors_;          // all unique pools/subpools
+  OutputSet pooled_resource_refs_;      // pooled resource references
+  OutputSet pooled_count_functions_;    // pooled count functions
+  OutputSet pool_mask_functions_;       // pool mask functions
+  OutputSet constraints_;               // constraint sets
+  OutputSet subunits_;                  // subunit lists
+  OutputSet cpu_instr_subunits_;        // cpu/instruction mapping to subunits
+  OutputSet reference_predicates_;      // reference predicate functions
+  OutputSet virtual_ref_predicates_;    // virtualized reference predicates
+  OutputSet forward_sets_;              // a set of forwarding edge weights
+
+  std::string file_name_c_;       // name of the database output file
+  std::string file_name_t_;       // name of the target library output file
+  std::string file_name_h_;       // name of the generated header output file
+                                  //
+  std::fstream *output_c_;        // the database output file
+  std::fstream *output_t_;        // the target library database output file
+  std::fstream *output_h_;        // the generated header file stream
+  InstructionDatabase *database_; // the thing we're writing out
+};
+
+} // namespace mdl
+} // namespace mpact
+
+#endif // MDL_COMPILER_MDL_OUTPUT_H_
diff --git a/llvm/utils/MdlCompiler/mdl_output.cpp b/llvm/utils/MdlCompiler/mdl_output.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_output.cpp
@@ -0,0 +1,2167 @@
+//===- mdl_output.cpp - Write out the MDL database ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Produce the instruction information database for LLVM as a C++ file.
+//
+// The overall schema for the database looks like this:
+//
+//     <A table of CPU definitions>
+//        <for each CPU, a table of instructions>
+//           <for each instruction, a set of subunit entries>
+//              <for each subunit, sets of...
+//                 <operand latency records>
+//                 <resource reference records>
+//                 <pooled resource references>
+//                 <register constraints>
+//     > > > >
+//
+// A key aspect of the design is that there is -enormous- duplication of
+// information across CPUs, Functional Units, and Instructions, and we
+// want to share this information across the database as much as possible.
+// The formatting functions automatically share output objects.
+//
+// The organization of the generated C++ output looks like this:
+//
+// <generated functions that implement non-trivial instruction predicates>
+// <generated functions that implement non-trivial pipeline phase expressions>
+// <a table of operand reference lists (operand latencies)>
+// <a table of conditional operand reference lists>
+// <tables of resource reference lists>
+// <a pooled resource allocation table>
+// <a pooled resource reference table>
+// <operand constraint tables>
+// <instruction tables for each CPU definition>
+// <an instruction name table (not really necessary...)>
+// <a table of CPU definitions>
+//
+// For each table of shared objects, we create a dictionary of the output
+// representation of each entry in the table. Once an entry is in the table
+// it is referred to by its unique identifier in the table.
+//
+// The majority of the code in this file simply handles the formatting of
+// the object to generate C++ code.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <numeric>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "llvm/Support/Error.h"
+
+#include "mdl.h"
+#include "mdl_output.h"
+
+namespace mpact {
+namespace mdl {
+
+// We generate a LOT of nullptrs in the output, and would prefer something
+// that adds less clutter.  So we use __ rather than "nullptr".
+constexpr auto kNull = "__";
+
+static const char *divider =
+    "\n//-------------------------------------------------------------------\n";
+
+// Functions for creating shared object names.
+static std::string TableName(int index, std::string prefix) {
+  if (index == -1)
+    return kNull;
+  return prefix + std::to_string(index);
+}
+static std::string PredicateName(int index) {
+  return TableName(index, "PRED_");
+}
+static std::string VirtualPredicateName(int index) {
+  return TableName(index, "MI_PRED_");
+}
+static std::string OperandListName(int index) {
+  return TableName(index, "OPND_");
+}
+static std::string CondReferenceName(int index) {
+  return TableName(index, "COND_");
+}
+static std::string CondResourceReferenceName(int index) {
+  return TableName(index, "CRES_");
+}
+static std::string CondPooledResourceReferenceName(int index) {
+  return TableName(index, "CPOOL_");
+}
+static std::string CondConstraintName(int index) {
+  return TableName(index, "CREG_");
+}
+static std::string UsedResourceListName(int index) {
+  return TableName(index, "URES_");
+}
+static std::string HeldResourceListName(int index) {
+  return TableName(index, "HRES_");
+}
+static std::string ReservedResourceListName(int index) {
+  return TableName(index, "RRES_");
+}
+static std::string ResourceGroupName(int index) {
+  return TableName(index, "GROUP_");
+}
+static std::string PoolDescriptorName(int index) {
+  return TableName(index, "POOL_");
+}
+static std::string PooledResourceListName(int index) {
+  return TableName(index, "PRES_");
+}
+static std::string PooledCountFuncName(int index) {
+  return TableName(index, "COUNT_");
+}
+static std::string PoolValueFuncName(int index) {
+  return TableName(index, "VALUE_");
+}
+static std::string ConstraintListName(int index) {
+  return TableName(index, "REG_");
+}
+static std::string PhaseName(int index) { return TableName(index, "PIPE_"); }
+static std::string SubunitListName(int index) {
+  return TableName(index, "SU_");
+}
+static std::string SubunitsName(const std::string &cpu,
+                                const std::string &name) {
+  return formatv("SU__{0}__{1}", cpu, name);
+}
+static std::string ForwardSetName(int index) {
+  return TableName(index, "FWD_");
+}
+
+// For non-trivial phase expressions, create a C++ expression to evaluate
+// the arithmetic and fetch operands from the instruction if needed.
+std::string OutputState::FormatPhaseExpr(const PhaseExpr *expr) const {
+  std::string left = expr->left() ? FormatPhaseExpr(expr->left()) : "";
+  std::string right = expr->right() ? FormatPhaseExpr(expr->right()) : "";
+
+  switch (expr->operation()) {
+  case kPlus:
+    return formatv("({0} + {1})", left, right);
+  case kMinus:
+    return formatv("({0} - {1})", left, right);
+  case kMult:
+    return formatv("({0} * {1})", left, right);
+  case kDiv:
+    if (expr->right()->IsExpressionConstant())
+      return formatv("({0} / {1})", left, right);
+    else
+      return formatv("({0} / ({1} ?: 1))", left, right);
+  case kNeg:
+    return formatv("-({0})", left);
+  case kInt:
+    return std::to_string(expr->number());
+  case kPhase:
+    return std::to_string(expr->phase_id());
+  case kOpnd:
+    return formatv("static_cast<int32_t>(ins->GetOperand({0}))",
+                   expr->operand()->operand_index());
+  case kPositive:
+    return formatv("std::max(0, {0})", left);
+  }
+  return "Error";
+}
+
+// Format a pipeline phase reference. A phase can either evaluate to an
+// integer, or the address of a function that calculates the phase.
+// If a function is required, generate the body of that function (it's just a
+// return statement), and enter it into a table, and return the function name.
+std::string OutputState::FormatPhase(const PhaseExpr *expr) {
+  if (expr->IsExpressionConstant())
+    return std::to_string(expr->EvaluateConstantExpression()) + "," + kNull;
+
+  std::string out = formatv("  return {0};", FormatPhaseExpr(expr));
+  auto index = AddEntry(phases_, out);
+  return formatv("-1,&{0}", PhaseName(index));
+}
+
+// Format reference flags field.
+std::string FormatReferenceFlags(const Reference *ref) {
+  std::string out;
+  if (ref == nullptr || ref->phase_expr()->IsDefaultLatency())
+    return "0";
+  if (ref->IsProtected())
+    out = std::to_string(RefFlags::kProtected);
+
+  if (ref->IsDuplicate())
+    out += formatv(out.empty() ? "{0}" : "|{0}",
+                   std::to_string(RefFlags::kDuplicate));
+
+  if (out.empty())
+    return "0";
+  return out;
+}
+
+// Format resource reference flags field.
+std::string FormatResourceReferenceFlags(const ResourceEvent &ref) {
+  std::string out;
+  // Handle operand and resource references.
+  if (ref.ref_type() != RefTypes::kFus) {
+    if (ref.phase_expr()->IsProtected())
+      out = std::to_string(RefFlags::kProtected);
+    if (ref.reference() && ref.reference()->IsDuplicate())
+      out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kDuplicate);
+    return out;
+  }
+
+  // Handle explicit functional unit reference flags.
+  if (ref.resource()->definition()->is_unreserved())
+    out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kUnreserved);
+  if (ref.is_buffered())
+    out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kBuffered);
+  if (RefFlags::is_begin_group(ref.fu_flags()))
+    out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kBeginGroup);
+  if (RefFlags::is_end_group(ref.fu_flags()))
+    out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kEndGroup);
+  if (RefFlags::is_single_issue(ref.fu_flags()))
+    out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kSingleIssue);
+  if (RefFlags::is_retire_ooo(ref.fu_flags()))
+    out += formatv(out.empty() ? "{0}" : "|{0}", RefFlags::kRetireOOO);
+  return out.empty() ? "0" : out;
+}
+
+// Format a predicate function, add it to the table, and return its index.
+int OutputState::FormatPredicate(PredExpr *pred) {
+  if (pred == nullptr)
+    return -1;
+  auto func = formatv("return {0};", FormatPredicateFunc(pred));
+
+  // If we're generating a standalone database, check to see if the function
+  // includes LLVM definitions for the target. This is conservative, but safe.
+  if (generate_llvm_defs_) {
+    if (func.find(database_->spec().family_name()) != std::string::npos ||
+        func.find("evaluatePredicate") != std::string::npos)
+      func = "return true;";
+  }
+
+  return AddEntry(reference_predicates_, func);
+}
+
+// This function is used to generate a phase function for an if/then/else
+// tree where its been determined (in IsSingleOperand) that all clauses of
+// the conditional reference access the same operand with various latencies.
+// We handle 2 special cases:
+//   - All the latencies are exactly the same integer value, so just return it.
+//   - There was only one phase function, so just return that function.
+// Its possible to see duplicate predicates in different clauses. and there's
+// no reason to write out a predicate more than once.
+std::string OutputState::FormatSingleConditionalOperand(ConditionalRef *cond) {
+  std::string out;
+  std::set<int> constant_exprs;
+  std::set<int> nontrivial_exprs;
+  std::set<int> seen_predicates;
+
+  for (; cond; cond = cond->else_clause()) {
+    auto *pred = cond->instr_predicate();
+    if (pred != nullptr && !pred->IsTrue()) {
+      auto index = FormatPredicate(pred);
+      if (seen_predicates.count(index))
+        continue; // Skip duplicate predicates
+      seen_predicates.insert(index);
+      out += formatv("  if ({0}(ins))", PredicateName(index));
+    }
+
+    PhaseExpr *expr = cond->refs()[0]->phase_expr();
+    if (expr->IsExpressionConstant()) {
+      int value = expr->EvaluateConstantExpression();
+      out += formatv("  return {0};\n", value);
+      constant_exprs.insert(value);
+    } else {
+      std::string func = formatv("  return {0};", FormatPhaseExpr(expr));
+      int index = AddEntry(phases_, func);
+      out += PhaseName(index) + ";\n";
+      nontrivial_exprs.insert(index);
+      forward_phases_.insert(index);
+    }
+
+    if (pred == nullptr || pred->IsTrue())
+      break;
+  }
+
+  // If we only saw one unique value generated, just return that value.
+  if (constant_exprs.size() + nontrivial_exprs.size() == 1) {
+    if (!constant_exprs.empty())
+      return std::to_string(*constant_exprs.begin()) + "," + kNull;
+    return formatv("-1,&{0}", PhaseName(*nontrivial_exprs.begin()));
+  }
+
+  // Otherwise insert the new function into the table and return it.
+  auto index = AddEntry(phases_, out);
+  return formatv("-1,&{0}", PhaseName(index));
+}
+
+// Generate a predicated operand reference. This is the top level function for
+// handling if/then/else references. This returns a constructor for an
+// operand reference, which is either a "normal" constructor if the predicates
+// can be folded into phase functions, or a conditional operand reference for
+// non-trivial if/then/else references.
+std::string OutputState::FormatConditionalOperandRef(ConditionalRef *cond) {
+  // If this set of conditionals only references a single operand, we can
+  // fold all the predicates into a phase function.
+  if (cond->IsSingleOperand()) {
+    auto *ref = cond->refs()[0];
+    int index = ref->operand() ? ref->operand()->operand_index() : -1;
+    return formatv("{{{0},{1},{2},{3}}", FormatReferenceType(ref->ref_type()),
+                   FormatReferenceFlags(ref),
+                   FormatSingleConditionalOperand(cond), index);
+  }
+
+  // If it is a set of if/then/else clauses that we can't simplify, generate
+  // a conditional operand reference record.
+  return formatv("{{{0}}", FormatIfElseOperandRef(cond));
+}
+
+// Else clauses are handled by creating a single OperandRef which is initialized
+// with an optional predicate, a pointer to an operand reference list, and an
+// optional else clause.  Empty else clauses simply return "nullptr".
+std::string OutputState::FormatIfElseOperandRef(ConditionalRef *cond) {
+  std::string out;
+  if (cond == nullptr)
+    return kNull;
+
+  // Add the operand list id to the list of OperandRef forward references.
+  auto opnds = FormatOperandReferenceList(&cond->refs());
+  if (opnds != kNull)
+    forward_opnd_refs_.insert(opnds.substr(1, opnds.size() - 1));
+
+  // If the predicate is null and the operand list is empty, just return null.
+  if (opnds == kNull && cond->instr_predicate() == nullptr)
+    return kNull;
+
+  // If the predicate is null it's an unconditional set of references.
+  if (cond->instr_predicate() == nullptr) {
+    out = formatv("{1},{0},{1}", opnds, kNull);
+  } else {
+    auto index = FormatPredicate(cond->instr_predicate());
+    auto else_refs = FormatIfElseOperandRef(cond->else_clause());
+    out = formatv("&{0},{1},{2}", PredicateName(index), opnds, else_refs);
+    // Add else clauses to ConditionalRefs forward references.
+    if (else_refs != kNull)
+      forward_cond_opnd_refs_.insert(else_refs.substr(1, else_refs.size() - 1));
+  }
+
+  auto index = AddEntry(cond_operand_refs_, out);
+  return formatv("&{0}", CondReferenceName(index));
+}
+
+// Format an operand reference. We generate an autoinitialization for the type:
+//     struct OperandRef {
+//       ReferenceType type;        // type of the reference (use, def, ...)
+//       ReferenceFlags flags;      // reference flags
+//       PipePhase phase;           // pipeline phase of the reference
+//       PipeFunc phase_func;       // optional pointer to phase function
+//       OperandId operand_index;   // operand index
+//     };
+std::string OutputState::FormatOperandReference(const Reference *ref) {
+  int index = ref->operand() ? ref->operand()->operand_index() : -1;
+  return formatv("{{{0},{1},{2},{3}}", FormatReferenceType(ref->ref_type()),
+                 FormatReferenceFlags(ref), FormatPhase(ref->phase_expr()),
+                 index);
+}
+
+// Format an operand reference list. Create a vector of operand references,
+// and enter into a table so it can be shared between subunits.
+std::string OutputState::FormatOperandReferenceList(const ReferenceList *refs) {
+  std::string out;
+  std::string previous;
+  for (const auto *ref : *refs) {
+    if (ref->IsConditionalRef()) {
+      if (ref->conditional_ref()->HasOperandRefs()) {
+        out += FormatConditionalOperandRef(ref->conditional_ref()) + ",";
+      }
+    } else if (ref->operand() && ref->IsOperandRefType()) {
+      auto current = FormatOperandReference(ref);
+      if (current != previous) {
+        out += current + ",";
+        previous = current;
+      }
+    }
+  }
+  if (out.empty())
+    return kNull;
+  out.pop_back(); // throw away trailing comma.
+
+  auto index = AddEntry(operand_refs_, out);
+  return formatv("&{0}", OperandListName(index));
+}
+
+// Format a single resource reference. We generate an autoinitialization of
+// the type:
+//    struct ResourceRef {
+//      ReferenceType type;         // type of the reference (use, def, ...)
+//      ReferenceFlags flags;       // reference flags
+//      PipePhase phase;            // pipeline phase of the reference
+//      PipeFunc phase_func;        // optional pointer to phase function
+//      unsigned int use_cycles;    // # cycles a resource is used
+//      ResourceId resource id;     // the resource we're referencing
+//      OperandId operand_index;    // operand index for shared resources
+//      PoolBits width;             // number of bits in shared value (or -1)
+//    };
+
+std::string OutputState::FormatResourceReference(const ResourceEvent &ref) {
+  auto *res = ref.resource();
+  auto type = FormatReferenceType(ref.ref_type());
+  auto flags = FormatResourceReferenceFlags(ref);
+  auto phase = FormatPhase(ref.phase_expr());
+  int opnd = res->operand_index();
+  int cycles = ref.use_cycles();
+  int size = res->definition()->bit_size();
+  std::string out;
+
+  // If this reference is a duplicate and it doesn't have a valid operand id,
+  // there's no reason to write it out.
+  if (ref.reference() && ref.reference()->IsDuplicate() &&
+      !res->has_operand_index())
+    return "";
+
+  // If this was a functional unit reference, write out a FU constructor.
+  if (ref.IsFuncUnitRef())
+    return formatv("{{{0},{1},{2},{3},{4}}", type, flags, cycles,
+                   res->get_final_resource_id(), ref.micro_ops());
+
+  // If this was a reference to an entire group, write out a reference for
+  // each group member.
+  if (res->IsGroupRef()) {
+    for (auto *member : res->definition()->member_defs()) {
+      out += formatv("{{{0},{1},{2},{3},{4},{5},{6}},", type, flags, phase,
+                     cycles, member->get_resource_id(), opnd, size);
+    }
+    out.pop_back(); // throw away trailing comma.
+    return out;
+  }
+
+  // If this is a reference to a single resource, write out a single reference.
+  if (!res->IsGroupRef() || res->first() == res->last())
+    return formatv("{{{0},{1},{2},{3},{4},{5},{6}}", type, flags, phase, cycles,
+                   res->get_final_resource_id(), opnd, size);
+
+  // If the reference was for a range of pool entries, write out a reference
+  // for each resource in the range. (This is rare.)
+  if (res->IsArrayDef()) {
+    for (int id = res->first(); id <= res->last(); id++) {
+      out += formatv("{{{0},{1},{2},{3},{4},{5},{6}},", type, flags, phase,
+                     cycles, res->get_resource_id() + id, opnd, size);
+    }
+  }
+  out.pop_back(); // throw away trailing comma.
+  return out;
+}
+
+// Given a list of possibly predicated references, generate the objects for
+// each reference.  This can handle pooled or not-pooled reference lists.
+std::string OutputState::FormatResourceReferenceList(
+    SubUnitInstantiation *subunit, ReferenceList &refs, RefType type,
+    OutputSet &output_list, FormatName name, bool format_pooled_refs) {
+  std::string out;
+  auto *cpu = subunit->cpu();
+
+  for (auto *ref : refs) {
+    if (ref->IsFuncUnitRef()) {
+      if (type == RefTypes::kUse) // if we're filtering for kUses.
+        out += FormatFuncUnitReference(subunit, ref, format_pooled_refs);
+      continue;
+    }
+
+    if (ref->IsConditionalRef()) {
+      auto res = FormatIfElseResourceRef(subunit, ref->conditional_ref(), type,
+                                         output_list, name, format_pooled_refs);
+      if (res != kNull)
+        out += "{&" + res + "},";
+      continue;
+    }
+
+    // Handle normal case of a reference that may contain resource refs.
+    auto ref_type = ref->AdjustResourceReferenceType();
+    if (ref_type != type)
+      continue;
+    for (auto *res : *ref->resources()) {
+      if (!res->IsNull() && (res->HasCount() == format_pooled_refs)) {
+        PhaseExpr *phase = ref->phase_expr();
+        if (auto *start = res->definition()->start_phase())
+          phase = new PhaseExpr(subunit->spec()->FindPipeReference(start, cpu));
+        if (ref->operand())
+          res->set_operand_index(ref->operand()->operand_index());
+        auto event = ResourceEvent(ref_type, phase, ref->use_cycles(), res, ref,
+                                   subunit);
+        if (!res->HasCount())
+          out += FormatResourceReference(event) + ",";
+        else
+          out += FormatPooledResourceReference(event) + ",";
+      }
+    }
+  }
+
+  if (out.empty())
+    return kNull;
+  out.pop_back(); // throw away trailing comma.
+
+  // Enter it into a table, and return the name of the table entry.
+  return name(AddEntry(output_list, out));
+}
+
+// Format an explicit functional unit reference. The resources in Fus
+// records refer to the name of either a functional unit template or a
+// functional unit group.  If its a functional unit template, we find each
+// occurance of that template in the current CPU - if there is more than
+// one we generate a pooled reference, otherwise we generate a single
+// reference of the instantiated FU.  If its a functional unit group, we
+// find all occurances of all members of the group, and generate references
+// for each individual instantiation.
+std::string OutputState::FormatFuncUnitReference(SubUnitInstantiation *subunit,
+                                                 Reference *ref,
+                                                 bool format_pooled_refs) {
+  std::string out;
+  auto *cpu = subunit->cpu();
+
+  // If the resource list is empty, this is a plain micro-ops statement.
+  if (ref->resources()->empty() && ref->micro_ops() > 0 && !format_pooled_refs)
+    return formatv("{{RefFus,{0},{1}},", ref->fu_flags(), ref->micro_ops());
+
+  // For each named FU, create resource events and write them out. If an
+  // instance is a sub-functional-unit, return its root parent resource.
+  for (auto *res : *ref->resources()) {
+    if (spec().IsFuncUnitTemplate(res->name())) {
+      auto &funits = cpu->func_unit_instances()[res->name()];
+      if (funits.size() == 0)
+        continue;
+
+      // Generate an entry for a single functional unit reference.
+      if (!format_pooled_refs && funits.size() == 1) {
+        bool buffered = funits[0]->instance()->is_buffered();
+        auto *fu = new ResourceRef(funits[0]->get_root_resource());
+        ResourceEvent fu_res(RefTypes::kFus, ref->phase_expr(),
+                             ref->use_cycles(), fu, ref->micro_ops(), buffered,
+                             ref->fu_flags());
+        out += FormatResourceReference(fu_res) + ",";
+        continue;
+      }
+      // If there are multiple instances of an FU, write out a pooled reference
+      // for all the instances.
+      if (format_pooled_refs && funits.size() > 1) {
+        ResourceDef pool("whatevs");
+        bool buffered = false;
+        for (auto *unit : funits) {
+          auto *fu = new ResourceRef(unit->get_root_resource());
+          ResourceEvent fu_member(
+              RefTypes::kFus, ref->phase_expr(), ref->use_cycles(), fu, 0,
+              unit->instance()->is_buffered(), ref->fu_flags());
+          buffered |= unit->instance()->is_buffered();
+          pool.add_member_def(unit->get_root_resource());
+          pool.members().push_back(unit->get_root_resource()->id());
+        }
+        auto *group_ref = new ResourceRef(&pool);
+        group_ref->set_pool_count(1);
+        ResourceEvent fu_res(RefTypes::kFus, ref->phase_expr(),
+                             ref->use_cycles(), group_ref, ref->micro_ops(),
+                             buffered, ref->fu_flags());
+        out += FormatPooledResourceReference(fu_res) + ",";
+        cpu->add_fu_pool_size(funits.size());
+      }
+
+      // For Functional Unit Groups, create a pool of all the members.
+    } else if (spec().IsFuncUnitGroup(res->name()) && format_pooled_refs) {
+      ResourceDef pool("whatevs");
+      auto *group = spec().fu_group_map()[res->name()];
+      bool buffered = group->buffer_size();
+      for (auto *fu_template : group->fu_members()) {
+        auto &units = cpu->func_unit_instances()[fu_template->name()];
+        for (auto *unit : units) {
+          pool.add_member_def(unit->get_root_resource());
+          pool.members().push_back(unit->get_root_resource()->id());
+        }
+      }
+      auto *group_ref = new ResourceRef(&pool);
+      group_ref->set_pool_count(1);
+      ResourceEvent fu_res(RefTypes::kFus, ref->phase_expr(), ref->use_cycles(),
+                           group_ref, ref->micro_ops(), buffered,
+                           ref->fu_flags());
+      out += FormatPooledResourceReference(fu_res) + ",";
+      cpu->add_fu_pool_size(pool.members().size());
+    }
+#if 0
+    // For Functional Unit Groups, just write out a use of each group member.
+    // Note that an individual group member can have multiple instances.
+     else if (spec().IsFuncUnitGroup(res->name()) && !format_pooled_refs) {
+      int micro_ops = ref->micro_ops();
+      for (auto *member : *spec().fu_group_map()[res->name()]->members()) {
+        // Add resource event for member->resource();
+        auto &funits = cpu->func_unit_instances()[member->name()];
+        for (auto *unit : funits) {
+          bool buffered = unit->instance()->is_buffered();
+          auto *fu = new ResourceRef(unit->get_root_resource());
+          ResourceEvent fu_res(RefTypes::kFus, ref->phase_expr(),
+                               ref->use_cycles(), fu, micro_ops, buffered,
+                               ref->fu_flags());
+          micro_ops = 0;   // Only write out microps for the first FU.
+          out += FormatResourceReference(fu_res) + ",";
+        }
+      }
+    }
+#endif
+  }
+
+  return out;
+}
+
+// Conditionally format a predicated set of resource references.  The input
+// reference list may or may not contain resource references, so we need to
+// handle the (common) case that none are found.
+std::string OutputState::FormatIfElseResourceRef(
+    SubUnitInstantiation *subunit, ConditionalRef *cond, RefType type,
+    OutputSet &output_list, FormatName name, bool format_pooled_refs) {
+  if (cond == nullptr)
+    return kNull;
+
+  // Find resource references and generate entries for each. If any are found,
+  // add the resource list id to the list of ResourceRef forward references.
+  auto then_refs = FormatResourceReferenceList(
+      subunit, cond->refs(), type, output_list, name, format_pooled_refs);
+  if (then_refs != kNull) {
+    if (format_pooled_refs)
+      forward_pooled_refs_.insert(then_refs);
+    else
+      forward_resource_refs_.insert(then_refs);
+  }
+
+  // If no resource references were found, and the predicate is null, abort.
+  if (then_refs == kNull && cond->instr_predicate() == nullptr)
+    return kNull;
+
+  // If the predicate is null it's an unconditional set of references.
+  std::string out;
+  if (cond->instr_predicate() == nullptr) {
+    out = formatv("{1},&{0},{1}", then_refs, kNull);
+  } else {
+    auto else_refs =
+        FormatIfElseResourceRef(subunit, cond->else_clause(), type, output_list,
+                                name, format_pooled_refs);
+    // Add else clauses to ConditionalRefs forward references.
+    if (else_refs != kNull) {
+      if (format_pooled_refs)
+        forward_cond_pool_refs_.insert(else_refs);
+      else
+        forward_cond_res_refs_.insert(else_refs);
+    }
+
+    if (else_refs == kNull && then_refs == kNull)
+      return kNull;
+    if (else_refs != kNull)
+      else_refs = "&" + else_refs;
+    if (then_refs != kNull)
+      then_refs = "&" + then_refs;
+
+    auto index = FormatPredicate(cond->instr_predicate());
+    out = formatv("&{0},{1},{2}", PredicateName(index), then_refs, else_refs);
+  }
+
+  if (format_pooled_refs) {
+    auto index = AddEntry(cond_pooled_resource_refs_, out);
+    return CondPooledResourceReferenceName(index);
+  } else {
+    auto index = AddEntry(cond_resource_refs_, out);
+    return CondResourceReferenceName(index);
+  }
+}
+
+// Format a resource reference list. Create a vector of resource references,
+// and enter into a table so it can be shared between subunits.
+std::string OutputState::FormatResourceReferences(InstrInfo *info, RefType type,
+                                                  OutputSet &output_list,
+                                                  FormatName name) {
+  // First write out entries for all the unconditional resource references.
+  std::string out, previous;
+  auto *subunit = info->subunit();
+  for (auto &ref : info->resources())
+    if (!ref.resource()->HasCount() && ref.ref_type() == type) {
+      auto resource = FormatResourceReference(ref);
+      if (!resource.empty() && resource != previous) {
+        out += resource + ",";
+        previous = resource;
+      }
+    }
+
+  // Format conditional resource references and FU references.
+  for (auto *ref : info->resource_refs()) {
+    if (ref->IsConditionalRef()) {
+      auto res = FormatIfElseResourceRef(subunit, ref->conditional_ref(), type,
+                                         output_list, name, false);
+      if (res != kNull)
+        out += "{&" + res + "},";
+    } else if (ref->IsFuncUnitRef() && type == RefTypes::kUse) {
+      out += FormatFuncUnitReference(subunit, ref, false);
+    }
+  }
+
+  if (out.empty())
+    return kNull;
+  out.pop_back(); // throw away trailing comma.
+
+  // Enter it into a table, and return a reference to the table entry.
+  auto index = AddEntry(output_list, out);
+  return formatv("&{0}", name(index));
+}
+
+// Search for a named attribute in an operand definition. If it has an
+// attribute predicate, make sure it's valid for this operand.
+// Note: Don't check bases here - that is done at a higher level.
+OperandAttribute *FindAttribute(const std::string &name, const OperandDef *opnd,
+                                const SubUnitInstantiation *subunit) {
+  if (opnd == nullptr || opnd->attributes() == nullptr)
+    return nullptr;
+  for (auto *attr : *opnd->attributes())
+    if (name == attr->name())
+      if (subunit->ValidPredicate(attr->predicate()))
+        return attr;
+  return nullptr;
+}
+
+// Find the derivation between an operand and a base operand.
+// Since we checked derivations earlier, this should always succeed.
+bool FindDerivation(OperandDef *ref, const OperandDef *decl,
+                    OperandDefList &opnds) {
+  opnds.push_back(ref);
+  if (ref == decl)
+    return true;
+  if (ref->base_operands())
+    for (auto *base : *ref->base_operands())
+      if (FindDerivation(base, decl, opnds))
+        return true;
+
+  opnds.pop_back();
+  return false;
+}
+
+// Generate the conditional code that implements an attribute predicate.
+std::string FormatAttributePredicate(const OperandAttribute *attr) {
+  std::string out;
+  for (auto *pred : *attr->predicate_values()) {
+    if (!out.empty())
+      out += " ||\n        ";
+    if (pred->IsValue())
+      out += formatv("(value == {0})", pred->FormatValue(pred->value()));
+    else if (pred->IsRange())
+      out += formatv("(value >= {0} && value <= {1})",
+                     pred->FormatValue(pred->low()),
+                     pred->FormatValue(pred->high()));
+    else if (pred->IsMask())
+      out += formatv("((value & ~{0:X8}UL) == 0)", pred->mask());
+  }
+
+  if (attr->predicate_values()->size() > 1)
+    return formatv("    if ({0})", out);
+  return formatv("    if {0}", out);
+}
+
+// Generate a function that returns the appropriate attribute value for the
+// given operand derivation.
+std::string FormatPooledCountFunction(std::string const &attr,
+                                      OperandDefList const &opnds,
+                                      const SubUnitInstantiation *subunit) {
+  // The generated function has the following declaration:
+  //    void COUNT_#(Instr *ins, int operand_id, int size, int values[]) {}
+  std::string out;
+  std::vector<std::string> lits;
+  std::string addr, label, none;
+  bool lit_end = false;
+
+  int64_t max_value = 0;
+  for (auto *opnd : opnds) {
+    for (auto op_attr : *opnd->attributes()) {
+      if (op_attr->name() == attr &&
+          subunit->ValidPredicate(op_attr->predicate())) {
+        int64_t value = op_attr->values(0);
+        std::string result = formatv("  return {0};", value);
+
+        std::string attr = op_attr->type();
+        if (attr.empty())
+          none = result + "  // none\n";
+        if (attr == "address" && addr.empty())
+          addr = formatv("  {0}  // addr\n", result);
+        if (attr == "label" && label.empty())
+          label = formatv("  {0}  // label\n", result);
+
+        if (attr == "lit" && !lit_end) {
+          std::string out;
+          lit_end = op_attr->predicate_values()->empty();
+          if (!lit_end) {
+            out =
+                formatv("{0}{1}\n", FormatAttributePredicate(op_attr), result);
+            lits.push_back(out);
+          } else {
+            lits.push_back(formatv("  {0}\n", result));
+          }
+        }
+        max_value = std::max(max_value, value);
+      }
+    }
+  }
+
+  if (!lits.empty()) {
+    out += "  if (ins->isOpndLiteral(operand_index)) {\n"
+           "    int64_t value = ins->GetOperand(operand_index);\n";
+    for (const auto &lit : lits)
+      out += lit;
+    out += "  }\n";
+  }
+  if (!label.empty())
+    out += formatv("  if (ins->isOpndLabel(operand_index))\n{0}", label);
+  if (!addr.empty())
+    out += formatv("  if (ins->isOpndAddress(operand_index))\n{0}", addr);
+  if (!none.empty())
+    out += none;
+
+  if (none.empty())
+    out += formatv("  return {0};  // default\n", max_value);
+  return out;
+}
+
+// Return a string that encodes the pool count and the address of a function
+// to call if a symbolic size was specified: <count, &function>.
+// If the provided name matches an operand attribute, generate a function to
+// calculate the right attribute value.  If it does't match an attribute,
+// generate the address of a user-define function to call.
+std::string OutputState::FormatPooledCount(const ResourceEvent &ref) {
+  auto *res = ref.resource();
+  int pool_count = res->pool_count();
+
+  // If there's no symbolic pooled count specified, just return the
+  // specified pool count.
+  if (res->pool_count_name() == nullptr)
+    return formatv("{0},{1}", pool_count, kNull);
+
+  auto count_name = res->pool_count_name()->name();
+  auto out = formatv("{0},&PoolCount_{1}", pool_count, count_name);
+
+  // If there is a symbolic pooled count, but no reference or no operand,
+  // then just generate a reference to the user-defined function.
+  if (ref.reference() == nullptr || ref.reference()->operand() == nullptr)
+    return out;
+
+  // If there is an operand reference, check to see if the pool count name
+  // matches an operand attribute with a valid predicate.
+  auto *opnd_ref = ref.reference()->operand();
+  auto *opnd_base = opnd_ref->operand_decl()->operand();
+  auto *opnd_def = opnd_ref->operand();
+
+  // If we can't find a derivation, there's a problem, just return.
+  OperandDefList opnds;
+  if (!FindDerivation(opnd_def, opnd_base, opnds)) {
+    spec().ErrorLog(ref.reference(), "Operand Derivation Panic\n\n");
+    return out;
+  }
+
+  // If we don't find the attribute name, generate the reference to the
+  // user-defined function.
+  OperandAttribute *attr = nullptr;
+  for (auto *opnd : opnds)
+    if ((attr = FindAttribute(count_name, opnd, ref.subunit())) != nullptr)
+      break;
+  if (attr == nullptr)
+    return out;
+
+  // If the attribute doesn't have predicate values associated with it, just
+  // return the attribute value.
+  if (attr->predicate_values()->empty())
+    return formatv("{0},{1}", attr->values(0), kNull);
+
+  // Finally, we can generate code for the attribute lookup, and return
+  // the name of the function.
+  auto func = FormatPooledCountFunction(count_name, opnds, ref.subunit());
+  auto index = AddEntry(pooled_count_functions_, func);
+  return formatv("{0},&{1}", pool_count, PooledCountFuncName(index));
+}
+
+// Generate a function that returns the appropriate attribute value(s) for the
+// given operand derivation.
+const int kMaxPoolCount = 20;
+
+static int FindLSB(int64_t val) {
+  int lsb = 0;
+  if (val == 0)
+    return 0;
+  for (; (val & 1) == 0; val >>= 1)
+    lsb++;
+  return lsb;
+}
+
+std::string FormatPoolValuesFunction(std::string const &attr,
+                                     ResourceEvent const &ref,
+                                     OperandDefList const &opnds,
+                                     const SubUnitInstantiation *subunit) {
+  // The generated function has the following declaration:
+  //    void VALUE_#(Instr *ins, int operand_id, int size, int values[]) {}
+  std::string sizes[kMaxPoolCount];
+  bool unconditional[kMaxPoolCount] = {false};
+
+  // Collect the attributes for each size found in the operands.
+  for (auto *opnd : opnds)
+    if (opnd->attributes()) {
+      for (auto op_attr : *opnd->attributes()) {
+        if (op_attr->name() == attr &&
+            subunit->ValidPredicate(op_attr->predicate())) {
+          int tuple_size = op_attr->values()->size();
+          if (unconditional[tuple_size])
+            continue;
+
+          std::string pred;
+          if (op_attr->predicate_values()->empty())
+            unconditional[tuple_size] = true;
+          else
+            pred = FormatAttributePredicate(op_attr);
+
+          std::string item;
+          for (int i = 0; i < tuple_size; i++) {
+            uint32_t val = op_attr->values(i);
+            if (int lsb = FindLSB(val))
+              item += formatv("      values[{0}] = (value & {1:X8}) >> {2};\n",
+                              i, val, lsb);
+            else
+              item += formatv("      values[{0}] = value & {1:X8};\n", i, val);
+          }
+
+          if (!op_attr->predicate_values()->empty())
+            item = formatv("  {{\n{0}      return true;\n    }\n", item);
+          else
+            item += "      return true;\n";
+          sizes[tuple_size] += formatv("{0}{1}", pred, item);
+        }
+      }
+    }
+
+  // If the resource reference indicated a specific size (res:1), make sure
+  // we found at least one attribute that satisfied that size.  If not, it's
+  // an error.
+  auto *res = ref.resource();
+  if (res->pool_count() > 0)
+    if (sizes[res->pool_count()].empty())
+      subunit->ErrorLog(ref.reference(),
+                        "Incompatible pool size specifier: {0}:{1}",
+                        res->id()->ToString(), res->pool_count());
+
+  std::string out = "  uint64_t value = ins->GetOperand(operand_index);\n";
+
+  for (int size = 1; size < kMaxPoolCount; size++) {
+    if (!sizes[size].empty()) {
+      out += formatv("  if (size == {0}) {{\n{1}", size, sizes[size]);
+      if (!unconditional[size]) {
+        for (int i = 0; i < size; i++)
+          out += formatv("    values[{0}] = value;\n", i);
+        out += "    return true;\n";
+      }
+      out += "  }\n";
+    }
+  }
+
+  return out + "  return false;\n";
+}
+
+// Return a string that encodes the name of a function to call if a reference
+// specifies an optional mask operation on an operand allocated to a pool.
+// Generate the function to fetch, shift and mask the parts of the operand
+// that are shared.
+std::string OutputState::FormatPoolValues(const ResourceEvent &ref) {
+  auto *res = ref.resource();
+  auto *def = res->definition();
+  // Some sanity checking.
+  if (res->value_name() == nullptr) {
+    if (def->has_shared_bits() && res->HasCount()) {
+      if (res->pool_count() > 1 || res->pool_count_name() != nullptr)
+        spec().ErrorLog(ref.reference(),
+                        "Missing value mask attribute on shared resource");
+    }
+    return kNull;
+  }
+
+  auto value_name = res->value_name()->name();
+
+  // If there is an operand reference, check to see if the mask name
+  // matches an operand attribute with a valid predicate.
+  auto *opnd_ref = ref.reference()->operand();
+  auto *opnd_base = opnd_ref->operand_decl()->operand();
+  auto *opnd_def = opnd_ref->operand();
+
+  // If we can't find a derivation, there's a problem, just return.
+  OperandDefList opnds;
+  if (!FindDerivation(opnd_def, opnd_base, opnds)) {
+    spec().ErrorLog(ref.reference(), "Operand Derivation Panic\n\n");
+    return kNull;
+  }
+
+  // If we don't find the attribute name, it's an error.
+  OperandAttribute *attr = nullptr;
+  for (auto *opnd : opnds)
+    if ((attr = FindAttribute(value_name, opnd, ref.subunit())) != nullptr)
+      break;
+
+  if (attr == nullptr) {
+    spec().ErrorLog(ref.reference(), "Invalid value mask name: {0}",
+                    value_name);
+    return kNull;
+  }
+
+  // Generate code for the attribute lookup, and return the function name.
+  auto func = FormatPoolValuesFunction(value_name, ref, opnds, ref.subunit());
+  auto index = AddEntry(pool_mask_functions_, func);
+  return formatv("&{0}", PoolValueFuncName(index));
+}
+
+// Format a single pool descriptor.
+// Generate an autoinitialization for the type:
+//    struct PoolDescriptor {
+//      uint8_t pool_id;              // which pool to allocate from
+//      uint8_t pool_size;            // how many different allocations sizes
+//      uint8_t count;                // how many pool elements we need
+//      PoolFunc pool_func;           // optional pointer to pool size func
+//      OpndValueFunc value_func;     // optional pointer to pool values func
+//      uint8_t first;                // index of first legal element id
+//      uint8_t last;                 // index of last legal element id
+//      uint8_t width;                // width in bits
+//    };
+std::string OutputState::FormatPoolDescriptor(const ResourceEvent &ref) {
+  auto *res = ref.resource();
+  SubPool pool(res);
+  auto &subpool_info = res->definition()->sub_pool(pool);
+  int subpool_size = *subpool_info.counts().rbegin();
+
+  std::string out =
+      formatv("{0},{1},{2},{3},{4},{5},{6}", subpool_info.subpool_id(),
+              subpool_size, FormatPooledCount(ref), FormatPoolValues(ref),
+              pool.first(), pool.last(), res->definition()->bit_size());
+
+  // Enter it into a table of pool descriptors, and return a reference to it.
+  auto index = AddEntry(pool_descriptors_, out);
+  return formatv("&{0}", PoolDescriptorName(index));
+}
+
+// Format a group of resources used in a pool request.  This is simply a list
+// of resource ids for groups or arrays. Enter them into a table so they can
+// be shared across pool requests.
+std::string OutputState::FormatResourceGroup(const ResourceEvent &ref) {
+  std::string out;
+  auto *res = ref.resource();
+
+  if (res->IsGroupRef()) {
+    for (auto *member : res->definition()->member_defs()) {
+      out += std::to_string(member->get_resource_id()) + ",";
+    }
+  } else if (res->IsArrayDef()) {
+    for (int id = res->first(); id <= res->last(); id++) {
+      out += std::to_string(res->get_resource_id() + id) + ",";
+    }
+  }
+
+  out.pop_back(); // throw away trailing comma.
+
+  // Enter it into a table of resource groups, and return a reference to it.
+  auto index = AddEntry(resource_groups_, out);
+  return formatv("{0}", ResourceGroupName(index));
+}
+
+// Format a single pooled resource reference.
+// Generate an autoinitialization for the type:
+//    struct PooledResourceRef {
+//      struct ResourceRef {
+//        ReferenceType type;         // type of the reference (use, def, ...)
+//        ReferenceFlags flags;       // reference flags
+//        PipePhase phase;            // pipeline phase of the reference
+//        unsigned int use_cycles;    // # cycles a resource is used
+//        PipeFunc phase_func;        // optional pointer to phase function
+//        ResourceId &resource id[];  // the resources we're referencing
+//        OperandId operand_index;    // operand index for shared resources
+//        int MicroOps;               // MicroOps for an FU entry
+//      };
+//      PoolDescriptor *pool;         // pointer to pool descriptor
+//    };
+std::string
+OutputState::FormatPooledResourceReference(const ResourceEvent &ref) {
+  auto *res = ref.resource();
+
+  auto pool = FormatPoolDescriptor(ref);
+  auto group = FormatResourceGroup(ref);
+
+  if (ref.IsFuncUnitRef())
+    return formatv("{{{0},{1},{2},{3},{4},{5}}",
+                   FormatReferenceType(ref.ref_type()),
+                   FormatResourceReferenceFlags(ref), ref.use_cycles(), group,
+                   pool, ref.micro_ops());
+
+  return formatv(
+      "{{{0},{1},{2},{3},{4},{5},{6}}", FormatReferenceType(ref.ref_type()),
+      FormatResourceReferenceFlags(ref), FormatPhase(ref.phase_expr()),
+      ref.use_cycles(), group, res->operand_index(), pool);
+}
+
+// Format a pooled operand reference list. Enter it into a table so it can
+// be shared with other subunits.
+std::string OutputState::FormatPooledResourceReferences(InstrInfo *info,
+                                                        OutputSet &output_list,
+                                                        FormatName name) {
+  // First write out entries for all unconditional pooled references.
+  std::string out, previous;
+  auto *subunit = info->subunit();
+  for (auto &ref : info->resources())
+    if (ref.resource()->HasCount()) { // Only pooled references
+      auto resource = FormatPooledResourceReference(ref);
+      if (resource != previous) {
+        out += resource + ",";
+        previous = resource;
+      }
+    }
+
+  // Format conditional pooled resource references and FU references.
+  for (auto *ref : info->resource_refs()) {
+    if (ref->IsConditionalRef()) {
+      auto res =
+          FormatIfElseResourceRef(subunit, ref->conditional_ref(),
+                                  RefTypes::kUse, output_list, name, true);
+      if (res != kNull)
+        out += "{&" + res + "},";
+    } else if (ref->IsFuncUnitRef()) {
+      out += FormatFuncUnitReference(subunit, ref, true);
+    }
+  }
+
+  if (out.empty())
+    return kNull;
+  out.pop_back(); // throw away trailing comma.
+
+  auto index = AddEntry(pooled_resource_refs_, out);
+  return formatv("&{0}", name(index));
+}
+
+// Format a single constraint. Return an empty string if no constraint found,
+// or if the MDL constraint doesn't further constrain the operand (ie, it is a
+// superset of the operand constraint).
+std::string OutputState::FormatConstraint(const Reference *ref) {
+  std::string family = database_->spec().family_name();
+  if (auto *opnd = ref->operand())
+    if (auto *port = ref->port())
+      if (auto *reg_class = port->reg_class())
+        if (auto *operand_class = opnd->operand_decl()->reg_class())
+          if (!reg_class->IsSupersetOf(operand_class))
+            return formatv("{{{0},{1}::{2}RegClassId}", opnd->operand_index(),
+                           family, reg_class->name());
+  return "";
+}
+
+// Find and format a list of constraints. Not all operands have constraints,
+// so the resulting string could be empty.
+std::string OutputState::FormatConstraintList(ReferenceList *refs) {
+  std::string out;
+  for (auto *ref : *refs) {
+    auto constraint = FormatConstraint(ref);
+    if (!constraint.empty())
+      out += constraint + ",";
+  }
+  if (out.empty())
+    return kNull;
+  out.pop_back(); // throw away trailing comma.
+
+  auto index = AddEntry(constraints_, out);
+  return ConstraintListName(index);
+}
+
+std::string OutputState::FormatIfElseConstraint(ConditionalRef *cond) {
+  if (cond == nullptr)
+    return kNull;
+
+  auto then_refs = FormatConstraintList(&cond->refs());
+  if (then_refs != kNull)
+    forward_constraint_refs_.insert(then_refs);
+
+  // If no constraints were found, and the predicate is null, abort.
+  if (then_refs == kNull && cond->instr_predicate() == nullptr)
+    return kNull;
+
+  std::string out;
+  if (cond->instr_predicate() == nullptr) {
+    out = formatv("{1},&{0},{1}", then_refs, kNull);
+  } else {
+    auto else_refs = FormatIfElseConstraint(cond->else_clause());
+    if (else_refs != kNull)
+      forward_cond_constraint_refs_.insert(else_refs);
+
+    if (else_refs == kNull && then_refs == kNull)
+      return kNull;
+    if (else_refs != kNull)
+      else_refs = "&" + else_refs;
+    if (then_refs != kNull)
+      then_refs = "&" + then_refs;
+
+    auto index = FormatPredicate(cond->instr_predicate());
+    out = formatv("&{0},{1},{2}", PredicateName(index), then_refs, else_refs);
+  }
+  auto index = AddEntry(cond_constraints_, out);
+  return CondConstraintName(index);
+}
+
+std::string OutputState::FormatPortReferences(InstrInfo *info) {
+  std::string out;
+  for (auto *ref : *info->references()) {
+    if (ref->IsConditionalRef()) {
+      auto constraint = FormatIfElseConstraint(ref->conditional_ref());
+      if (constraint != kNull)
+        out += "{&" + constraint + "},";
+    } else {
+      auto constraint = FormatConstraint(ref);
+      if (!constraint.empty())
+        out += constraint + ",";
+    }
+  }
+
+  if (out.empty())
+    return kNull;
+  out.pop_back(); // throw away trailing comma.
+
+  auto index = AddEntry(constraints_, out);
+  return formatv("&{0}", ConstraintListName(index));
+}
+
+// Scan a reference list, marking resource uses which are identical to earlier
+// references. This enables a single instruction to reference resources
+// several times - in different operands - without impacting hazards and
+// bundle packing.
+void MarkDuplicateReferences(ResourceList &refs) {
+  for (auto &ref : refs)
+    for (auto &old_ref : refs) {
+      if (&ref == &old_ref)
+        break;
+      auto *res = ref.resource();
+      auto *old_res = old_ref.resource();
+      if (ref.phase_expr()->ToString() != old_ref.phase_expr()->ToString())
+        continue;
+      if (res->get_final_resource_id() != old_res->get_final_resource_id())
+        continue;
+      if (ref.ref_type() != old_ref.ref_type())
+        continue;
+      if (res->HasCount() || old_res->HasCount())
+        continue;
+      int ref_operand_index = res->operand_index();
+      int old_operand_index = old_res->operand_index();
+      if (ref_operand_index != old_operand_index) {
+        ref.reference()->SetDuplicate();
+        break;
+      }
+    }
+}
+
+// Format a single subunit.
+std::string OutputState::FormatSubunit(InstrInfo *info) {
+  // First format all the operand references.
+  auto operands = FormatOperandReferenceList(info->references());
+
+  // Sort the references so that they are ordered by phase, then resource id.
+  // This will speed up bundle packing, since functional units and issue slots
+  // are the lowest-numbered resources.
+  std::stable_sort(info->resources().begin(), info->resources().end());
+
+  MarkDuplicateReferences(info->resources());
+
+  auto used = FormatResourceReferences(
+      info, RefTypes::kUse, used_resource_refs_, &UsedResourceListName);
+  auto held = FormatResourceReferences(
+      info, RefTypes::kHold, held_resource_refs_, &HeldResourceListName);
+  auto rsvd = FormatResourceReferences(info, RefTypes::kReserve,
+                                       reserved_resource_refs_,
+                                       &ReservedResourceListName);
+  auto pooled = FormatPooledResourceReferences(info, pooled_resource_refs_,
+                                               &PooledResourceListName);
+  auto constraints = FormatPortReferences(info);
+
+  // Its pretty common that most of these fields are null, so write out shorter
+  // initializations if that's the case (this just saves disk space).
+  if (held == kNull && rsvd == kNull && pooled == kNull && constraints == kNull)
+    return formatv("{{{0},{1}}", operands, used);
+  return formatv("{{{0},{1},{2},{3},{4},{5}}", operands, used, held, rsvd,
+                 pooled, constraints);
+}
+
+// Format a subunit set for an instruction on a single CPU.
+std::string OutputState::FormatSubunits(const std::string &instr,
+                                        const InstrInfoList &info_list,
+                                        const std::string &cpuname) {
+  std::string out;
+  for (auto *info : info_list)
+    if (info->subunit()->func_unit()->cpu()->name() == cpuname) {
+      out += FormatSubunit(info) + ",";
+    }
+
+  if (out.empty())
+    return kNull; // We didn't find subunits for this CPU.
+  out.pop_back(); // throw away trailing comma.
+
+  // Not sure if its worthwhile actually sharing these, but its pretty easy.
+  auto index = AddEntry(subunits_, out);
+  cpu_instr_subunits_[SubunitsName(cpuname, instr)] = index;
+  return out;
+}
+
+void OutputState::WriteTable(const OutputSet &objects, const std::string &type,
+                             const std::string &suffix, FormatName name,
+                             const std::string &title,
+                             const std::string &info) {
+  if (!title.empty())
+    output_c() << formatv("{0}// {1} ({2} entries){3}{0}", divider, title,
+                          objects.size(), info);
+  for (auto &[out, index] : objects)
+    output_c() << formatv("{0} {1}{2} {{{3}};\n", type, name(index), suffix,
+                          out);
+}
+
+// Helper to figure out how many constructors are included on a a vector
+// initialization line. We're just counting leading '{'.  Note that some
+// vectors don't have any braces.
+static int count_vector_init_items(std::string input) {
+  int count = 0;
+  for (auto c : input)
+    if (c == '{')
+      count++;
+  return std::max(count, 1); // In case there's just one initializer
+}
+
+void OutputState::WriteVectorTable(const OutputSet &objects,
+                                   const std::string &type, FormatName name,
+                                   const std::string &title,
+                                   const std::string &info) {
+  if (!title.empty())
+    output_c() << formatv("{0}// {1} ({2} entries){3}{0}", divider, title,
+                          objects.size(), info);
+  for (auto &[out, index] : objects) {
+    output_c() << formatv("{0} {1}_data[] = {{{2}};\n", type, name(index), out);
+    output_c() << formatv("{0}Vec {1} = {{ {2}, {1}_data };\n", type,
+                          name(index), count_vector_init_items(out));
+  }
+}
+
+void OutputState::WritePhases(const OutputSet &phases, FormatName name) const {
+  if (phases.empty())
+    return;
+  output_c() << formatv(
+      "{0}// Functions to compute non-trivial pipeline phase expressions{0}",
+      divider);
+
+  // Write out any forward declarations that might be needed.
+  for (auto index : forward_phases_)
+    output_c() << formatv("unsigned {0}(Instr *ins);\n", name(index));
+  output_c() << "\n";
+
+  for (auto &[phase, index] : phases)
+    output_c() << formatv("unsigned {0}(Instr *ins) {{\n{1} }\n", name(index),
+                          phase);
+}
+
+void OutputState::WritePoolCountFunctions(const OutputSet &funcs,
+                                          FormatName name) const {
+  if (funcs.empty())
+    return;
+  output_c() << formatv(
+      "{0}{1}{0}", divider,
+      "// Functions to compute attribute-based pool size counts");
+
+  for (auto &[func, index] : funcs)
+    output_c() << formatv("int {0}(Instr *ins, int operand_index) {{\n{1}}\n",
+                          name(index), func);
+}
+
+void OutputState::WritePoolValueFunctions(const OutputSet &funcs,
+                                          FormatName name) const {
+  if (funcs.empty())
+    return;
+  output_c() << formatv(
+      "{0}{1}{0}", divider,
+      "// Functions to fetch and normalize operand values for sharing");
+
+  for (auto &[func, index] : funcs)
+    output_c() << formatv("bool {0}(Instr *ins, int operand_index, "
+                          "int size, int values[]) {{\n{1}}\n",
+                          name(index), func);
+}
+
+void OutputState::WritePredicateFunctions(const OutputSet &funcs,
+                                          FormatName name,
+                                          const std::string &type,
+                                          std::fstream &output) const {
+  if (funcs.empty())
+    return;
+  output << formatv("{0}// {1}{0}", divider, type);
+  for (auto &[func, index] : funcs) {
+    output << formatv("bool {0}(Instr *MI) {{\n{1}\n}\n", name(index), func);
+  }
+}
+
+void OutputState::WriteVirtualPredicateTable(const OutputSet &funcs) const {
+  output_t() << formatv("{0}// Virtual predicate function table{0}", divider);
+  output_t() << "std::vector<PredFunc> InstrPredicates { ";
+  for (unsigned i = 0; i < funcs.size(); i++) {
+    if (i > 0)
+      output_t() << ", ";
+    output_t() << "&" << VirtualPredicateName(i);
+  }
+  output_t() << "};\n\n";
+}
+
+// TODO(tbd): Write out a representation of register classes.
+// No current architecture currently requires this...
+void OutputState::WriteClasses(const OutputSet &reg_classes, FormatName name) {}
+
+// Write out some information about each llvm instruction definition.
+// This is only necessary when generating a stand-alone database.
+void OutputState::WriteInstructionInfo() const {
+  if (!generate_llvm_defs_)
+    return;
+  if (database_->spec().cpus().empty())
+    return;
+
+  std::string out;
+  std::string family = database_->spec().family_name();
+  for (auto *ins : database_->spec().instructions()) {
+    out += formatv("{{::llvm::{0}::{1},\"{1}\"},\n", family, ins->name());
+    if (ins->derived())
+      for (auto *derived : *ins->derived())
+        out +=
+            formatv("{{::llvm::{0}::{1},\"{1}\"},\n", family, derived->name());
+  }
+
+  output_c() << formatv("{0}// Instruction name table ({1} entries){0}",
+                        divider, database_->spec().instructions().size());
+  output_c() << formatv("InstructionNameMap InstructionNames = {{\n{0}};\n",
+                        out);
+}
+
+// Write out instruction tables for each defined CPU.
+void OutputState::WriteInstructionTables() const {
+  std::string family = database_->spec().family_name();
+  for (auto *cpu : database_->spec().cpus()) {
+    std::string out;
+    int instr_count = 0; //  Number of instructions for this CPU.
+    out = formatv("  static SubunitTable table;\n"
+                  "  static sys::SmartMutex<true> Mutex;\n"
+                  "  sys::SmartScopedLock<true> Lock(Mutex);\n"
+                  "  if (table.size() != 0) return &table;\n"
+                  "  table.resize(::llvm::{0}::INSTRUCTION_LIST_END, {1});\n",
+                  family, kNull);
+
+    for (auto &[iname, info] : database_->instruction_info()) {
+      std::string su_name = SubunitsName(cpu->name(), iname);
+      if (cpu_instr_subunits_.count(su_name)) {
+        int id = cpu_instr_subunits_.at(su_name);
+        out += formatv("  table[::llvm::{0}::{1}] = &{2};\n", family, iname,
+                       SubunitListName(id));
+        if (info[0]->instruct()->derived())
+          for (auto *derived : *info[0]->instruct()->derived())
+            out += formatv("  table[::llvm::{0}::{1}] = &{2};\n", family,
+                           derived->name(), SubunitListName(id));
+
+        instr_count++;
+      }
+    }
+
+    output_c() << formatv(
+        "{0}// Instruction table initialization for {1} ({2} valid entries){0}",
+        divider, cpu->name(), instr_count);
+    output_c() << formatv(
+        "SubunitTable *SUNITS_{0}() {{\n{1}  return &table;\n}\n", cpu->name(),
+        out);
+  }
+}
+
+// Generate the forwarding table for a single CPU.
+std::string OutputState::FormatForwardingInfo(const CpuInstance *cpu,
+                                              FwdNetwork &network) {
+  std::string out;
+  for (int from = 0; from < network.units(); from++) {
+    std::string fwd_set_out;
+    for (int to = 0; to < network.units(); to++)
+      fwd_set_out += formatv("{0},", network.get(from, to));
+
+    auto index = AddEntry(forward_sets_, fwd_set_out);
+    out += formatv("{0},", ForwardSetName(index));
+  }
+
+  return formatv("int8_t *FWD_{0}[{1}] = {{ {2} };\n", cpu->name(),
+                 network.units(), out);
+}
+
+// Given a forwarding statement functional unit specifier, look for a match
+// for the name in functional unit templates, functional unit groups, and
+// functional unit instances.
+// Forwarding statements within a cluster definition apply only to that
+// cluster. Cpu-level statements apply across all clusters.
+std::vector<int> OutputState::FindUnitIds(const CpuInstance *cpu,
+                                          const ClusterInstance *fwd_cluster,
+                                          const Identifier *name) const {
+  std::vector<int> units;
+  auto &spec = database_->spec();
+  // If it's a functional unit template, find all instances of that template.
+  if (spec.fu_map().count(name->name())) {
+    for (auto *cluster : *cpu->clusters())
+      if (cluster == fwd_cluster || fwd_cluster == nullptr) {
+        for (auto *fu : cluster->fu_instantiations())
+          if (fu->func_type()->name() == name->name())
+            units.push_back(fu->get_resource()->get_resource_id());
+      }
+    return units;
+  }
+
+  // If it's a group, find all uses of every member functional unit template.
+  if (spec.fu_group_map().count(name->name())) {
+    for (auto *member : *spec.fu_group_map()[name->name()]->members()) {
+      auto gunits = FindUnitIds(cpu, fwd_cluster, member);
+      units.insert(units.end(), gunits.begin(), gunits.end());
+    }
+    return units;
+  }
+
+  // If it's not a template or a group, find a functional unit instance of
+  // the name.  There potentially could be one in each cluster.
+  for (auto *cluster : *cpu->clusters())
+    if (cluster == fwd_cluster || fwd_cluster == nullptr) {
+      for (auto *fu : cluster->fu_instantiations())
+        if (fu->instance()->name() == name->name())
+          units.push_back(fu->get_resource()->get_resource_id());
+    }
+  return units;
+}
+
+void OutputState::ExpandForwardStmt(FwdNetwork &network, const CpuInstance *cpu,
+                                    const ClusterInstance *cluster,
+                                    const ForwardStmt *fwd) const {
+  auto &spec = database_->spec();
+  auto from = fwd->from_unit();
+  auto defs = FindUnitIds(cpu, cluster, from);
+  if (!defs.empty()) {
+    for (const auto &[to, cycles] : fwd->to_units()) {
+      auto uses = FindUnitIds(cpu, cluster, to);
+      if (!uses.empty()) {
+        for (auto def : defs)
+          for (auto use : uses)
+            network.set(def, use, cycles);
+      } else {
+        spec.ErrorLog(to, "Invalid functional unit: {0}", to->name());
+      }
+    }
+  } else {
+    spec.ErrorLog(from, "Invalid functional unit: {0}", from->name());
+  }
+}
+
+// Generate forwarding information tables. For each CPU, use all the
+// forward clauses to build a representation of the forwarding network, then
+// write out a dense representation of each network.
+void OutputState::GenerateForwardingInfo() {
+  std::vector<std::string> networks;
+  for (auto *cpu : database_->spec().cpus())
+    if (!cpu->forward_stmts()->empty()) {
+      FwdNetwork network(cpu->max_fu_id() + 1);
+      for (auto *fwd : *cpu->forward_stmts())
+        ExpandForwardStmt(network, cpu, nullptr, fwd);
+      for (auto *cluster : *cpu->clusters())
+        for (auto *fwd : *cluster->forward_stmts())
+          ExpandForwardStmt(network, cpu, cluster, fwd);
+      networks.push_back(FormatForwardingInfo(cpu, network));
+    }
+
+  if (networks.empty())
+    return;
+  output_c() << formatv("{0}// Functional unit forwarding tables.{0}", divider);
+
+  // Write out the networks for each processor.
+  WriteTable(forward_sets_, "int8_t", "[]", &ForwardSetName, "");
+  for (auto &item : networks)
+    output_c() << item;
+}
+
+// Calculate a "resource factor".  This is used by LLVM generic scheduler
+// to relate functional unit usage to cycles (unique for each CPU). We
+// calculate it here so we don't have to in LLVM.
+int CalcResourceFactor(CpuInstance *cpu) {
+  int factor = std::max(1, cpu->max_issue());
+  for (auto size : cpu->fu_pool_sizes())
+    factor = std::lcm(factor, size);
+  return factor;
+}
+
+// Write out the top-level CPU table, which contains pointers to instruction
+// tables for each CPU.
+void OutputState::WriteCpuList() const {
+  std::string out, cpu_defs;
+  for (auto *cpu : database_->spec().cpus()) {
+    int execute_stage = database_->spec().FindFirstExecutePhase(cpu)->index();
+    std::string fwd = cpu->forward_stmts()->empty()
+                          ? kNull
+                          : formatv("&FWD_{0}[0]", cpu->name());
+    int resource_factor = CalcResourceFactor(cpu);
+
+    cpu_defs +=
+        formatv("CpuConfig<CpuParams<{0},{1},{2}, {3},{4}, {5},{6}, "
+                "{7},{8},{9},{10}>> CPU_{11}(&SUNITS_{11},{12},{13});\n",
+                cpu->all_resources().back()->get_resource_id(),
+                cpu->max_used_resource_id(), cpu->max_fu_id(),
+                cpu->pool_count(), cpu->max_pool_allocation(),
+                std::max(1, cpu->max_issue()), cpu->reorder_buffer_size(),
+                execute_stage, cpu->load_phase(), cpu->high_latency_def_phase(),
+                cpu->max_resource_phase(), cpu->name(), fwd, resource_factor);
+
+    for (const auto &llvm_name : cpu->llvm_names())
+      out += formatv("  {{\"{0}\", &CPU_{1} },\n", llvm_name, cpu->name());
+  }
+
+  // Write out CPU configurations for each subtarget in the family.
+  output_c() << formatv("{0}// Family CPU Descriptions.\n"
+                        "//  CpuParams:\n"
+                        "//    - Total number of defined resources\n"
+                        "//    - Maximum \"used\" resource id\n"
+                        "//    - Maximum functional unit id\n"
+                        "//\n"
+                        "//    - Number of distinct allocation pools\n"
+                        "//    - Largest resource pool allocation size\n"
+                        "//\n"
+                        "//    - Instruction issue width\n"
+                        "//    - Instruction reorder buffer size\n"
+                        "//\n"
+                        "//    - First execution pipeline phase\n"
+                        "//    - Default load phase\n"
+                        "//    - \"High-latency instruction\" write phase\n"
+                        "//    - Latest resource use pipeline phase"
+                        "{0}{1}",
+                        divider, cpu_defs);
+
+  // Write out the top-level cpu table for this family.
+  output_c() << formatv("{0}// Top-level {1} Subtarget Description Table.{0}",
+                        divider, database_->spec().family_name());
+  output_c() << formatv("CpuTableDict CpuDict = {{\n{0}};\n\n", out);
+  output_c() << formatv("CpuTableDef CpuTable = CpuTableDef(CpuDict);\n");
+}
+
+// Open the output files, abort if unable to do that.
+void OutputState::OpenOutputFiles() {
+  // Split out the input filename and directory.
+  auto infile = std::filesystem::path(database_->file_name());
+  std::string dir_name = infile.parent_path();
+  std::string base_name = infile.stem();
+  if (!database_->directory_name().empty())
+    dir_name = database_->directory_name();
+
+  auto AddSlash = [](std::string path_name) {
+    if (!path_name.empty() && path_name.back() != '/')
+      path_name += "/";
+    return path_name;
+  };
+
+  // Open the main database output file.
+  file_name_c_ = formatv("{0}{1}GenMdlInfo.inc", AddSlash(dir_name), base_name);
+  output_c_ = new std::fstream(file_name_c_, std::fstream::out);
+  if (!output_c_->is_open()) {
+    llvm::errs() << formatv("Cannot open output file \"{0}\", aborting\n",
+                            file_name_c_);
+    exit(EXIT_FAILURE);
+  }
+
+  // Open the Target library component of the database.
+  file_name_t_ =
+      formatv("{0}{1}GenMdlTarget.inc", AddSlash(dir_name), base_name);
+  output_t_ = new std::fstream(file_name_t_, std::fstream::out);
+  if (!output_t_->is_open()) {
+    llvm::errs() << formatv("Cannot open output file \"{0}\", aborting\n",
+                            file_name_t_);
+    exit(EXIT_FAILURE);
+  }
+
+  // Open output header file filename.
+  file_name_h_ = formatv("{0}{1}GenMdlInfo.h", AddSlash(dir_name), base_name);
+  output_h_ = new std::fstream(file_name_h_, std::fstream::out);
+  if (!output_h_->is_open()) {
+    llvm::errs() << formatv("Cannot open output file \"{0}\", aborting\n",
+                            file_name_h_);
+    exit(EXIT_FAILURE);
+  }
+}
+
+// Write out headers to the C and H output files.
+void OutputState::WriteHeader() {
+  auto infile = std::filesystem::path(database_->file_name());
+  std::string cpu_name = infile.stem();
+  output_c() << formatv("{0}// Machine Description Database.\n"
+                        "// This file is auto-generated, do not edit.{1}\n",
+                        divider + 1, divider);
+
+  if (!generate_llvm_defs_) {
+    output_c() << "#include \"llvm/Support/Mutex.h\"\n";
+    output_c() << "#include \"llvm/MC/MDLInfo.h\"\n";
+    output_c() << "#include \"llvm/MC/MDLInstrInfo.h\"\n";
+    output_c() << "#include \"" + cpu_name + "InstrInfo.h\"\n";
+  }
+  output_c() << formatv("#include \"{0}\"\n\n", file_name_h_);
+
+  output_t() << formatv(
+      "{0}// Machine Description Database: Target library components\n"
+      "// This file is auto-generated, do not edit.{1}",
+      divider + 1, divider);
+  output_t()
+      << "// This file contains MDL predicate functions that call Target\n"
+         "// library functions. Since MDL lives in MC, and MC is included\n"
+         "// in programs that may NOT include the Target library, we need\n"
+         "// to virtualize these."
+      << divider;
+  output_t() << "#include \"llvm/MC/MDLInfo.h\"\n";
+  output_t() << "#include \"llvm/MC/MDLInstrInfo.h\"\n\n";
+  output_t() << "#include \"" + cpu_name + "InstrInfo.h\"\n";
+
+  output_h() << formatv("#ifndef {0}_MACHINE_DESCRIPTION_DATABASE\n", cpu_name);
+  output_h() << formatv("#define {0}_MACHINE_DESCRIPTION_DATABASE\n", cpu_name);
+  output_h() << formatv("{0}// Machine Description Database.\n"
+                        "// This file is auto-generated, do not edit.{1}",
+                        divider + 1, divider);
+  output_h() << "#include <map>\n";
+  output_h() << "#include <string>\n";
+}
+
+void OutputState::WriteTrailer() {
+  auto infile = std::filesystem::path(database_->file_name());
+  std::string cpu_name = infile.stem();
+  output_h() << formatv("\n#endif  // {0}_MACHINE_DESCRIPTION_DATABASE\n",
+                        cpu_name);
+}
+
+// Write out some global statistics about the spec:
+//   - Maximum number of resources (across CPUs, for each CPU).
+//   - Maximum resource id used in RefUse operations.
+//   - Maximum pipeline phase used by RefUse operations.
+//   - Maximum number of instructions that can be issued in parallel.
+//   - Maximum number of pools (across CPUs, for each CPU).
+//   - Maximum pool allocation size across CPUs.
+// NOTE: These are worst-case numbers across all family members, we may
+// want to write out CPU-specific versions for compiler performance reasons.
+void OutputState::WriteSpecDefinitions() {
+  MdlSpec &spec = database_->spec();
+  int max_res = 0;
+  int max_use_res = 0;
+  int max_phase = 0;
+  int max_issue = 0;
+  int max_pools = 0;
+  int max_pool_alloc = 0;
+
+  // Find that absolute worst-case pipeline phase.
+  int max_phase_worst_case = 0;
+  for (auto *pipe : spec.pipe_phases())
+    max_phase_worst_case =
+        std::max(max_phase_worst_case, pipe->phase_names()->back()->index());
+
+  // Find maximum number of resources across CPUs.
+  for (auto *cpu : spec.cpus())
+    max_res = std::max(max_res, cpu->all_resources().back()->get_resource_id());
+
+  // Find maximum number of pools across CPUs.
+  for (auto *cpu : spec.cpus())
+    max_pools = std::max(max_pools, cpu->pool_count());
+
+  // Find the maximum pool allocation size across CPUs.
+  for (auto *cpu : spec.cpus()) {
+    int cpu_max_alloc = 0;
+    for (auto *def : cpu->pool_resources())
+      if (!def->alloc_sizes().empty())
+        cpu_max_alloc = std::max(cpu_max_alloc, *def->alloc_sizes().rbegin());
+    cpu->set_max_pool_allocation(cpu_max_alloc);
+    max_pool_alloc = std::max(max_pool_alloc, cpu_max_alloc);
+  }
+
+  // Find maximum resource use phase for each CPU.
+  // Also find the maximum resource id used in RefUse rules (for all CPUs).
+  for (auto *cpu : spec.cpus()) {
+    int max_cpu_phase = 0;
+    int max_cpu_use_res = 0;
+    for (auto *res : cpu->all_resources())
+      if (res->ref_types() & (RefTypes::kUse | RefTypes::kFus)) {
+        if (res->phase_expr_seen())
+          max_cpu_phase = max_phase_worst_case;
+        else
+          max_cpu_phase = std::max(max_cpu_phase, res->latest_ref());
+
+        // If referencing a group or pool, we need to note all the pool or
+        // group members as explicitly used.
+        if (res->IsGroupDef()) {
+          for (auto *member : res->member_defs())
+            max_cpu_use_res =
+                std::max(max_cpu_use_res, member->get_resource_id());
+        } else {
+          int res_id = res->get_resource_id();
+          if (res->pool_size() > 0)
+            res_id += res->pool_size() - 1;
+          max_cpu_use_res = std::max(max_cpu_use_res, res_id);
+        }
+      }
+
+    cpu->set_max_resource_phase(max_cpu_phase);
+    max_phase = std::max(max_phase, max_cpu_phase);
+
+    cpu->set_max_used_resource_id(max_cpu_use_res);
+    max_use_res = std::max(max_use_res, max_cpu_use_res);
+  }
+
+  // Find maximum issue size for each CPU.
+  // Count issue slots for each cluster. If a cluster doesn't have issue
+  // slots, count functional unit instantiations.
+  for (auto *cpu : spec.cpus()) {
+    int issue = 0;
+    for (auto *clst : *cpu->clusters())
+      issue += !clst->issues()->empty() ? clst->issues()->size()
+                                        : clst->func_units()->size();
+    cpu->set_max_issue(issue);
+    max_issue = std::max(max_issue, issue);
+  }
+
+  // Find the earliest pipeline phase referenced by name in a "use" clause
+  // for each CPU.
+  for (auto &[instr_name, info_list] : database_->instruction_info()) {
+    for (auto *info : info_list) {
+      auto *cpu = info->subunit()->func_unit()->cpu();
+      int min_use = cpu->early_use_phase();
+      for (auto *ref : *info->references()) {
+        if (ref->operand() && ref->ref_type() == RefTypes::kUse) {
+          if (auto *phase = ref->phase_expr()->GetPhaseName()) {
+            if (min_use == -1)
+              min_use = phase->index();
+            else
+              min_use = std::min(min_use, phase->index());
+          }
+        }
+      }
+      cpu->set_early_use_phase(min_use);
+    }
+  }
+  output_h() << formatv("\nnamespace llvm {{\nnamespace {0} {{\n",
+                        spec.family_name());
+
+  output_h() << formatv("{0}// Global constant definitions{0}", divider);
+  output_h() << formatv("const int kMaxResourceId = {0};\n", max_res);
+  output_h() << formatv("const int kMaxUsedResourceId = {0};\n", max_use_res);
+  output_h() << formatv("const int kMaxPipePhase = {0};\n", max_phase);
+  output_h() << formatv("const int kMaxIssue = {0};\n", max_issue);
+  output_h() << formatv("const int kMaxPools = {0};\n", max_pools);
+  output_h() << formatv("const int kMaxPoolCount = {0};\n", max_pool_alloc);
+}
+
+// Write out definitions we expect LLVM tablegen to create:
+//    - instruction ids.
+//    - register ids.
+//    - register class ids.
+// This function will be unnecessary after integration with LLVM.
+void OutputState::WriteLLVMDefinitions() {
+  if (!generate_llvm_defs_)
+    return;
+
+  MdlSpec &spec = database_->spec();
+  int id = 0;
+  output_h() << formatv("{0}// LLVM Instruction defs{0}", divider);
+  output_h() << "  enum {\n";
+
+  for (auto *instr : spec.instructions()) {
+    output_h() << formatv("    {0},  // {1}\n", instr->name(), id++);
+    if (instr->derived())
+      for (auto *dinstr : *instr->derived())
+        output_h() << formatv("    {0},  // {1}\n", dinstr->name(), id++);
+  }
+  output_h() << formatv("    INSTRUCTION_LIST_END,  // {0}\n", id++);
+
+  output_h() << formatv("{0}// Register defs{0}", divider);
+  for (auto *reg : spec.registers())
+    output_h() << formatv("  {0},  // {1}\n", reg->name(), id++);
+
+  output_h() << formatv("{0}// Register class def{0}", divider);
+  for (auto *reg_class : spec.reg_classes())
+    output_h() << formatv("  {0}RegClassId,  // {1}\n", reg_class->name(),
+                          id++);
+
+  output_h() << formatv("\n  }; // enum\n");
+}
+
+// Format the fully qualified name of a resource.
+std::string ResourceName(MdlSpec &spec, CpuInstance *cpu,
+                         ClusterInstance *cluster, ResourceDef *res) {
+  std::string name = formatv("{0}::{1}::", spec.family_name(), cpu->name());
+  if (cluster && !cluster->IsNull())
+    name += formatv("{0}::", cluster->name());
+  name += res->name();
+  return name;
+}
+
+// Write out a single resource definition.
+void AddResourceDef(std::string *out, MdlSpec &spec, CpuInstance *cpu,
+                    ClusterInstance *cluster, ResourceDef *res,
+                    std::string note) {
+  std::string prefix = "  ";
+  if (cluster && !cluster->IsNull())
+    prefix += "  ";
+
+  // Note: we don't need to print out groups.
+  if (!res->IsGroupDef())
+    *out += formatv("{0}  const int {1} = {2};      // {3}\n", prefix,
+                    res->name(), res->get_resource_id(),
+                    ResourceName(spec, cpu, cluster, res), note);
+}
+
+// Write out definitions for functional unit and issue slot resources.
+void OutputState::WriteResourceDefinitions() {
+  MdlSpec &spec = database_->spec();
+  std::string out;
+
+  for (auto *cpu : spec.cpus()) {
+    out +=
+        formatv("{0}// Resource Definitions for {1}{0}", divider, cpu->name());
+    out += formatv("  namespace {0} {{\n", cpu->name());
+    for (auto *res : *cpu->resources())
+      AddResourceDef(&out, spec, cpu, nullptr, res, "resource");
+
+    // Write out resources associated with a cluster.
+    for (auto *cluster : *cpu->clusters()) {
+      if (!cluster->IsNull())
+        out += formatv("\n    namespace {0} {{\n", cluster->name());
+
+      // Write out functional unit resource definitions. We only write names
+      // for top-level functional unit resources, and don't write out
+      // catchall units.
+      for (auto *fu : cluster->fu_instantiations()) {
+        if (fu->parent() == nullptr && !fu->instance()->is_catchall_unit()) {
+          auto *res = fu->get_resource();
+          AddResourceDef(&out, spec, cpu, cluster, res, "func unit");
+        }
+      }
+      for (auto *issue : *cluster->issues())
+        AddResourceDef(&out, spec, cpu, cluster, issue, "issue slot");
+      for (auto *res : *cluster->resources())
+        AddResourceDef(&out, spec, cpu, cluster, res, "resource");
+      if (!cluster->IsNull())
+        out += formatv("    }  // namespace {0}\n", cluster->name());
+    }
+    out += formatv("  }  // namespace {0}\n", cpu->name());
+  }
+
+  output_h() << "\nnamespace MdlResources {\n";
+  output_h() << out;
+  output_h() << "}  // namespace MdlResources\n";
+}
+
+// Write external definitions to the output header file.
+void OutputState::WriteExterns() {
+  std::string family = database_->spec().family_name();
+  output_h() << formatv("{0}// External definitions{0}", divider);
+  if (generate_llvm_defs_) {
+    output_h() << "using InstructionNameMap = std::map<int, std::string>;\n";
+    output_h() << formatv("extern InstructionNameMap InstructionNames;\n",
+                          family);
+  }
+  output_h() << formatv("extern llvm::mdl::CpuTableDef CpuTable;\n\n", family);
+  output_h() << formatv("}  // namespace {0}\n}  // namespace llvm\n", family);
+}
+
+// Top level function for writing out the machine description.
+void OutputState::WriteCpuTable() {
+  for (auto [instr_name, info] : database_->instruction_info())
+    for (auto *cpu : database_->spec().cpus())
+      FormatSubunits(instr_name, info, cpu->name());
+
+  // Collect and write out overall spec parameters after processing all the
+  // CPUs' subunits.
+  WriteSpecDefinitions();
+
+  output_c() << formatv("\nnamespace llvm {{\nnamespace {0} {{\n",
+                        database_->spec().family_name());
+  output_t() << formatv("\nnamespace llvm {{\nnamespace {0} {{\n",
+                        database_->spec().family_name());
+  output_c() << formatv("using namespace ::llvm::mdl;\n");
+  output_t() << formatv("using namespace ::llvm::mdl;\n");
+  output_c() << formatv("constexpr auto {0} = nullptr;\n", kNull);
+
+  // Generate some constants for reference types.
+  output_c() << "constexpr auto RefPred    = ReferenceTypes::RefPred;\n";
+  output_c() << "constexpr auto RefUse     = ReferenceTypes::RefUse;\n";
+  output_c() << "constexpr auto RefDef     = ReferenceTypes::RefDef;\n";
+  output_c() << "constexpr auto RefKill    = ReferenceTypes::RefKill;\n";
+  output_c() << "constexpr auto RefUseDef  = ReferenceTypes::RefUseDef;\n";
+  output_c() << "constexpr auto RefHold    = ReferenceTypes::RefHold;\n";
+  output_c() << "constexpr auto RefReserve = ReferenceTypes::RefReserve;\n";
+  output_c() << "constexpr auto RefFus     = ReferenceTypes::RefFus;\n";
+
+  WritePredicateFunctions(reference_predicates_, &PredicateName,
+                          "Predicate functions", output_c());
+  WritePredicateFunctions(virtual_ref_predicates_, &VirtualPredicateName,
+                          "Virtual predicate functions", output_t());
+  WriteVirtualPredicateTable(virtual_ref_predicates_);
+
+  WritePhases(phases_, &PhaseName);
+  WritePoolCountFunctions(pooled_count_functions_, &PooledCountFuncName);
+  WritePoolValueFunctions(pool_mask_functions_, &PoolValueFuncName);
+
+  // Write out forward references for conditional references.
+  output_c() << formatv(
+      "{0}// Forward references for conditional references{0}", divider);
+  for (auto name : forward_opnd_refs_)
+    output_c() << formatv("extern OperandRefVec {0};\n", name);
+  if (!forward_opnd_refs_.empty())
+    output_c() << "\n";
+  for (auto name : forward_cond_opnd_refs_)
+    output_c() << formatv("extern ConditionalRef<OperandRef> {0};\n", name);
+  if (!forward_cond_opnd_refs_.empty())
+    output_c() << "\n";
+
+  for (auto name : forward_resource_refs_)
+    output_c() << formatv("extern ResourceRefVec {0};\n", name);
+  if (!forward_resource_refs_.empty())
+    output_c() << "\n";
+  for (auto name : forward_cond_res_refs_)
+    output_c() << formatv("extern ConditionalRef<ResourceRef> {0};\n", name);
+  if (!forward_cond_res_refs_.empty())
+    output_c() << "\n";
+
+  for (auto name : forward_pooled_refs_)
+    output_c() << formatv("extern PooledResourceRefVec {0};\n", name);
+  if (!forward_pooled_refs_.empty())
+    output_c() << "\n";
+  for (auto name : forward_cond_pool_refs_)
+    output_c() << formatv("extern ConditionalRef<PooledResourceRef> {0};\n",
+                          name);
+  if (!forward_cond_pool_refs_.empty())
+    output_c() << "\n";
+
+  for (auto name : forward_constraint_refs_)
+    output_c() << formatv("extern OperandConstraintVec {0};\n", name);
+  if (!forward_constraint_refs_.empty())
+    output_c() << "\n";
+  for (auto name : forward_cond_constraint_refs_)
+    output_c() << formatv("extern ConditionalRef<OperandConstraint> {0};\n",
+                          name);
+  if (!forward_cond_constraint_refs_.empty())
+    output_c() << "\n";
+
+  output_c()
+      << divider
+      << "// Conditional Reference Tables:\n"
+         "//     - Predicate function (optional)\n"
+         "//     - References (operands, resource or pooled resource refs\n"
+         "//     - \"Else\" clause conditional reference (optional)"
+      << divider;
+
+  WriteTable(cond_operand_refs_, "ConditionalRef<OperandRef>", "",
+             &CondReferenceName, "Conditional Operand Reference Table");
+  WriteTable(cond_resource_refs_, "ConditionalRef<ResourceRef>", "",
+             &CondResourceReferenceName,
+             "Conditional Resource Reference Table");
+  WriteTable(cond_pooled_resource_refs_, "ConditionalRef<PooledResourceRef>",
+             "", &CondPooledResourceReferenceName,
+             "Conditional Pooled Resource Reference Table");
+  WriteTable(cond_constraints_, "ConditionalRef<OperandConstraint>", "",
+             &CondConstraintName, "Conditional Constraints Table");
+
+  WriteVectorTable(operand_refs_, "OperandRef", &OperandListName,
+                   "Operand Reference Table",
+                   "\n//     - Resource type(use, def, cond)\n"
+                   "//     - Reference flags (protected, unprotected)\n"
+                   "//     - Pipeline phase\n"
+                   "//     - Pipeline phase function (optional)\n"
+                   "//     - Operand index\n"
+                   "//   or (for conditional references)\n"
+                   "//     - Conditional reference");
+
+  output_c()
+      << divider
+      << "// Resource Reference Tables:\n"
+         "//     - Reference type (use, hold, reserve)\n"
+         "//     - Reference flags (protected, unprotected, reserved)\n"
+         "//     - Pipeline phase\n"
+         "//     - Pipeline phase function (optional)\n"
+         "//     - Used cycles\n"
+         "//     - Resource id\n"
+         "//     - Operand index (for shared resources)\n"
+         "//     - Width in bits (for shared resources)\n"
+         "//   or (for functional unit descriptors)\n"
+         "//     - Reference type (fus)\n"
+         "//     - Reference flags (reserved, buffered, begin_group, ...)\n"
+         "//     - Used cycles\n"
+         "//     - Resource id\n"
+         "//     - Number of MicroOps\n"
+         "//   or (for \"unitless\" micro-ops)\n"
+         "//     - Reference type (fus)\n"
+         "//     - Number of MicroOps\n"
+         "//   or (for conditional references)\n"
+         "//     - Conditional reference"
+      << divider;
+
+  WriteVectorTable(used_resource_refs_, "ResourceRef", &UsedResourceListName,
+                   "Used Resource Reference Table");
+  WriteVectorTable(held_resource_refs_, "ResourceRef", &HeldResourceListName,
+                   "Held Resource Reference Table");
+  WriteVectorTable(reserved_resource_refs_, "ResourceRef",
+                   &ReservedResourceListName,
+                   "Reserved Resource Reference Table");
+
+  WriteTable(resource_groups_, "ResourceIdType", "[]", &ResourceGroupName,
+             "Resource Group Table");
+
+  auto pool_descriptor_table =
+      "\n"
+      "//      pool_id -    which pool to allocate from\n"
+      "//      pool_size -  how many different allocations sizes\n"
+      "//      count -      how many pool elements we need\n"
+      "//      pool_func -  optional pointer to pool size func\n"
+      "//      value_func - optional pointer to pool values func\n"
+      "//      first -      index of first legal element id\n"
+      "//      last -       index of last legal element id\n"
+      "//      width -      width in bits";
+
+  WriteTable(pool_descriptors_, "PoolDescriptor", "", &PoolDescriptorName,
+             "Pool Descriptor Table", pool_descriptor_table);
+
+  auto pooled_refs_header =
+      "\n"
+      "//     - Resource type (use, hold, reserve)\n"
+      "//     - Reference flags (protected, unprotected, reserved\n"
+      "//     - Pipeline phase\n"
+      "//     - Pipeline phase function (optional)\n"
+      "//     - Used cycles\n"
+      "//     - Resource group\n"
+      "//     - Operand index (for shared resources)\n"
+      "//     - Pool descriptor\n"
+      "//   or (for functional unit descriptors)\n"
+      "//     - Reference type (fus)\n"
+      "//     - Reference flags (reserved, buffered, begin_group, ...)\n"
+      "//     - Used cycles\n"
+      "//     - Group id\n"
+      "//     - Pool id\n"
+      "//     - Number of MicroOps";
+
+  WriteVectorTable(pooled_resource_refs_, "PooledResourceRef",
+                   &PooledResourceListName, "Pooled Resource Reference Table",
+                   pooled_refs_header);
+
+  auto constraint_table_header = "\n"
+                                 "//     - Operand Index\n"
+                                 "//     - Constraint id\n"
+                                 "//   or (for conditional reference)\n"
+                                 "//     - Conditional constraint name\n";
+
+  WriteVectorTable(constraints_, "OperandConstraint", &ConstraintListName,
+                   "Operand Constraint Table", constraint_table_header);
+
+  WriteVectorTable(subunits_, "Subunit", &SubunitListName, "Subunit Table");
+
+  WriteInstructionTables();
+  WriteInstructionInfo();
+  GenerateForwardingInfo();
+  WriteCpuList();
+
+  output_c() << formatv("}  // namespace {0}\n}  // namespace llvm\n\n",
+                        database_->spec().family_name());
+  output_t() << formatv("}  // namespace {0}\n}  // namespace llvm\n\n",
+                        database_->spec().family_name());
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/MdlCompiler/mdl_predicate.cpp b/llvm/utils/MdlCompiler/mdl_predicate.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_predicate.cpp
@@ -0,0 +1,819 @@
+//===- mdl_predicate.cpp - Process Tablegen predicates --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that process reference predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "mdl.h"
+#include "mdl_output.h"
+
+namespace mpact {
+namespace mdl {
+
+//---------------------------------------------------------------------------
+// Attempt to evaluate a user-defined predicate over a specific instruction.
+// We try to evaluate all of the predicate at MDL build time.  Anything that
+// cannot be evaluated will be evaluated at compile time.  So the goal is to
+// prune as much of the expression as possible, leaving things that we will
+// need to generate compile-time predicate code for.
+//---------------------------------------------------------------------------
+using PredFunc = PredExpr *(MdlSpec::*)(PredExpr *pred,
+                                        const InstructionDef *def);
+
+// Table mapping predicate expression types to string names.
+std::string PredExpr::PredName() {
+  static auto *pred_name = new std::unordered_map<PredOp, std::string>(
+      {{PredOp::kTrue, kTrue},
+       {PredOp::kFalse, kFalse},
+       {PredOp::kEmpty, kEmpty},
+       {PredOp::kCheckAny, kCheckAny},
+       {PredOp::kCheckAll, kCheckAll},
+       {PredOp::kCheckNot, kCheckNot},
+       {PredOp::kCheckOpcode, kCheckOpcode},
+       {PredOp::kCheckIsRegOperand, kCheckIsRegOperand},
+       {PredOp::kCheckIsImmOperand, kCheckIsImmOperand},
+       {PredOp::kCheckZeroOperand, kCheckZeroOperand},
+       {PredOp::kCheckFunctionPredicate, kCheckFunctionPredicate},
+       {PredOp::kCheckFunctionPredicateWithTII, kCheckFunctionPredicateWithTII},
+       {PredOp::kCheckNumOperands, kCheckNumOperands},
+       {PredOp::kCheckRegOperand, kCheckRegOperand},
+       {PredOp::kCheckInvalidRegOperand, kCheckInvalidRegOperand},
+       {PredOp::kCheckImmOperand, kCheckImmOperand},
+       {PredOp::kCheckSameRegOperand, kCheckSameRegOperand},
+       {PredOp::kOpcodeSwitchStmt, kOpcodeSwitchStmt},
+       {PredOp::kOpcodeSwitchCase, kOpcodeSwitchCase},
+       {PredOp::kReturnStatement, kReturnStatement}});
+  return (*pred_name)[opcode_];
+}
+
+PredExpr *MdlSpec::EvaluatePredicate(std::string name,
+                                     const InstructionDef *instr) {
+  if (predicate_table_.count(name) == 0)
+    return nullptr;
+  return EvaluatePredicate(predicate_table_[name], instr);
+}
+
+PredExpr *MdlSpec::EvaluatePredicate(PredExpr *pred,
+                                     const InstructionDef *instr) {
+  // Table of operation-to-evaluation functions.
+  static auto *pred_ops = new std::unordered_map<PredOp, PredFunc>({
+      {PredOp::kTrue, &MdlSpec::PredSimple},
+      {PredOp::kFalse, &MdlSpec::PredSimple},
+      {PredOp::kEmpty, &MdlSpec::PredSimple},
+      {PredOp::kName, &MdlSpec::PredEvalName},
+      {PredOp::kCheckAny, &MdlSpec::PredCheckAny},
+      {PredOp::kCheckAll, &MdlSpec::PredCheckAll},
+      {PredOp::kCheckNot, &MdlSpec::PredCheckNot},
+      {PredOp::kCheckOpcode, &MdlSpec::PredCheckOpcode},
+      {PredOp::kCheckIsRegOperand, &MdlSpec::PredCheckIsReg},
+      {PredOp::kCheckRegOperand, &MdlSpec::PredCheckReg},
+      {PredOp::kCheckInvalidRegOperand, &MdlSpec::PredCheckInvalidReg},
+      {PredOp::kCheckSameRegOperand, &MdlSpec::PredCheckSameReg},
+      {PredOp::kCheckNumOperands, &MdlSpec::PredCheckNumOperand},
+      {PredOp::kCheckIsImmOperand, &MdlSpec::PredCheckIsImm},
+      {PredOp::kCheckImmOperand, &MdlSpec::PredCheckImm},
+      {PredOp::kCheckZeroOperand, &MdlSpec::PredCheckZero},
+      {PredOp::kCheckFunctionPredicate, &MdlSpec::PredSimple},
+      {PredOp::kCheckFunctionPredicateWithTII, &MdlSpec::PredSimple},
+      {PredOp::kOpcodeSwitchStmt, &MdlSpec::PredOpcodeSwitchStmt},
+  });
+
+  PredOp opcode = pred->opcode();
+  if (pred_ops->count(opcode))
+    return (this->*(*pred_ops)[opcode])(pred, instr);
+
+  return pred;
+}
+
+// Look up a predicate by name, and return the associated predicate.
+// If the predicate maps to a name, recur on that name.
+PredExpr *MdlSpec::LookupPredicate(PredExpr *pred) {
+  if (!IsValidInstructionPredicate(pred->value())) {
+    ErrorLog(pred, "Undefined predicate: {0}", pred->value());
+    return new PredExpr(PredOp::kFalse);
+  }
+
+  auto *item = predicate_table_[pred->value()];
+  if (item->opcode() == PredOp::kName)
+    return LookupPredicate(item);
+  return item;
+}
+
+// Evaluate a named predicate.  Since CheckNots aren't propagated through
+// named predicates, we need to handle negates explicitly.
+PredExpr *MdlSpec::PredEvalName(PredExpr *pred, const InstructionDef *instr) {
+  auto *item = LookupPredicate(pred);
+  auto *result = EvaluatePredicate(item, instr);
+  if (pred->negate())
+    return PredSimplify(new PredExpr(*pred, PredOp::kCheckNot, result));
+
+  return result;
+}
+
+// Logical OR operator on child predicates:
+//   - immediately return True on a predicate that evaluates to True.
+//   - discard any predicates that evaluate to False.
+//   - if the predicate can't be completely evaluated, add to result set.
+PredExpr *MdlSpec::PredCheckAny(PredExpr *pred, const InstructionDef *instr) {
+  std::vector<PredExpr *> result;
+  for (auto *or_op : pred->operands()) {
+    auto *item = EvaluatePredicate(or_op, instr);
+    if (item->IsTrue())
+      return item;
+    if (!item->IsFalse())
+      result.push_back(item);
+  }
+  // If we didn't find True or partially evaluated predicates, return False.
+  if (result.empty())
+    return new PredExpr(PredOp::kFalse);
+  // If we only found one partially evaluated predicate, just return it.
+  if (result.size() == 1)
+    return result[0];
+  // If there is more than one predicate, return an OR of them.
+  return new PredExpr(*pred, PredOp::kCheckAny, result);
+}
+
+// Logical AND operator on child predicates:
+//   - immediately return False on a predicate that evaluates to False.
+//   - discard any predicates that evaluate to True.
+//   - if the predicate can't be completely evaluated, add to result set.
+PredExpr *MdlSpec::PredCheckAll(PredExpr *pred, const InstructionDef *instr) {
+  std::vector<PredExpr *> result;
+  for (auto *and_op : pred->operands()) {
+    auto *item = EvaluatePredicate(and_op, instr);
+    if (item->IsFalse())
+      return item;
+    if (!item->IsTrue())
+      result.push_back(item);
+  }
+  // If we didn't find True or partially evaluated predicates, return True.
+  if (result.empty())
+    return new PredExpr(PredOp::kTrue);
+  // If we only found one partially evaluated predicate, just return it.
+  if (result.size() == 1)
+    return result[0];
+  // If there is more than one predicate, return an AND of them.
+  return new PredExpr(*pred, PredOp::kCheckAll, result);
+}
+
+// Logical NOT operator on the child predicate.
+// kCheckNot operators are almost always simplified away, so when evaluating one
+// we need to preserve it in the expression unless the child is simplified to
+// true or false.
+PredExpr *MdlSpec::PredCheckNot(PredExpr *pred, const InstructionDef *instr) {
+  auto *item = EvaluatePredicate(pred->operands()[0], instr);
+  if (item->IsFalse())
+    return new PredExpr(PredOp::kTrue);
+  if (item->IsTrue())
+    return new PredExpr(PredOp::kFalse);
+  return new PredExpr(*pred, PredOp::kCheckNot, item);
+}
+
+// Check for a particular opcode.  This always return either true or false.
+PredExpr *MdlSpec::PredCheckOpcode(PredExpr *pred,
+                                   const InstructionDef *instr) {
+  std::vector<PredExpr *> result;
+  for (auto *opcode : pred->operands())
+    if (opcode->value() == instr->name())
+      return new PredExpr(PredOp::kTrue, pred->negate());
+  return new PredExpr(PredOp::kFalse, pred->negate());
+}
+
+// If a predicate operand is a predicate index, look up the operand and
+// check it for validity.  Return -1 if its invalid.
+// If the operand is an operand reference, look it up and return its index.
+int MdlSpec::PredOperandIndex(const PredExpr *pred,
+                              const InstructionDef *instr) {
+  // Predicate operand indexes are flattened operand indexes.
+  if (pred->opcode() == PredOp::kNumber) {
+    int index = std::stoi(pred->value());
+    int num_operands = instr->num_flat_operands();
+    bool ellipsis = instr->has_ellipsis();
+    return (index < num_operands || ellipsis) ? index : -1;
+  }
+
+  if (pred->opcode() == PredOp::kOperandRef)
+    return FindOperandName(instr, *pred->opnd()->op_names(), RefTypes::kNull);
+  return -1;
+}
+
+// Check if a specified operand is a register operand.  We look for register
+// class operands or a register name. If we reference a defined operand, we
+// can always determine if it's a register or not. If it refers to a variadic
+// operand, we have to generate a compile-time test.
+PredExpr *MdlSpec::PredCheckIsReg(PredExpr *pred, const InstructionDef *instr) {
+  int index = PredOperandIndex(pred->operands()[0], instr);
+  if (index == -1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (index >= instr->num_flat_operands()) {
+    if (instr->has_ellipsis())
+      return pred;
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  }
+
+  // If it's a valid operand, we can always determine whether or not it is a
+  // register operand.
+  auto *opnd = (*instr->flat_operands())[index];
+  auto type = opnd->base_type()->name();
+  if (!reg_class_map().count(type) && FindItem(registers(), type) == nullptr)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  return new PredExpr(PredOp::kTrue, pred->negate());
+}
+
+// Check if a specific register operand is an invalid register.  We usually
+// need to generate a compile-time check for this, but can do some sanity
+// checking at compiler build time.
+PredExpr *MdlSpec::PredCheckInvalidReg(PredExpr *pred,
+                                       const InstructionDef *instr) {
+  int index = PredOperandIndex(pred->operands()[0], instr);
+  if (index == -1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (index >= instr->num_flat_operands()) {
+    if (instr->has_ellipsis())
+      return pred;
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  }
+
+  // If the operand type is a named register, then it can't be invalid.
+  auto *opnd = (*instr->flat_operands())[index];
+  auto type = opnd->base_type()->name();
+  if (FindItem(registers(), type))
+    return new PredExpr(PredOp::kFalse, pred->negate());
+
+  return pred;
+}
+
+// Check if an operand is a specific register. There are several cases we
+// can handle at compiler build time:
+// - If the declared operand has a register name, we can match it against the
+//   specified register name.
+// - If the declared operand is a register class, we can check whether the
+//   specified register name is NOT in that class.
+// - If it's an invalid operand index, we can return kFalse.
+// In all other cases, we need to generate a compile-time test.
+PredExpr *MdlSpec::PredCheckReg(PredExpr *pred, const InstructionDef *instr) {
+  int index = PredOperandIndex(pred->operands()[0], instr);
+  if (index == -1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (index >= instr->num_flat_operands()) {
+    if (instr->has_ellipsis())
+      return pred;
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  }
+
+  // If we have a custom function to call, we can't evaluate it.
+  if (pred->operands().size() == 3)
+    return pred;
+
+  auto reg_name = pred->operands()[1]->value();
+  auto *opnd = (*instr->flat_operands())[index];
+  auto type = opnd->base_type()->name();
+
+  // If the operand type is a register, see if it matches the specified name.
+  if (FindItem(registers(), type)) {
+    auto opcode = (type == reg_name) ? PredOp::kTrue : PredOp::kFalse;
+    return new PredExpr(opcode, pred->negate());
+  }
+
+  // If the operand type is a register class, see if the specified name is NOT
+  // in the class.
+#ifdef HAVE_REGISTER_OVERLAP_INFORMATION
+  // We currently can't do this if the target has overlapping classes.
+  // This is specified in the td files, but we don't currently reflect this
+  // information in the machine description.
+  // TODO(tdb): Scrape overlapping register information from the td files,
+  // and use that information here.
+  if (reg_class_map().count(type) &&
+      !FindItem(*reg_class_map()[type]->members(), reg_name))
+    return new PredExpr(PredOp::kFalse, pred->negate());
+#endif
+
+  return pred;
+}
+
+// In general, we need to do a runtime test unless the indexes are invalid.
+// We -could- check for cases involving literal register operands and/or
+// non-intersecting register classes.
+PredExpr *MdlSpec::PredCheckSameReg(PredExpr *pred,
+                                    const InstructionDef *instr) {
+  int index0 = PredOperandIndex(pred->operands()[0], instr);
+  if (index0 == -1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (index0 >= instr->num_flat_operands())
+    return instr->has_ellipsis() ? pred
+                                 : new PredExpr(PredOp::kFalse, pred->negate());
+  int index1 = PredOperandIndex(pred->operands()[1], instr);
+  if (index1 == -1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (index1 >= instr->num_flat_operands())
+    return instr->has_ellipsis() ? pred
+                                 : new PredExpr(PredOp::kFalse, pred->negate());
+
+  // Make sure they're both register operands.
+  auto *opnd0 = (*instr->flat_operands())[index0];
+  auto type0 = opnd0->base_type()->name();
+  auto *opnd1 = (*instr->flat_operands())[index1];
+  auto type1 = opnd1->base_type()->name();
+
+  bool reg0 = FindItem(registers(), type0) || reg_class_map().count(type0);
+  bool reg1 = FindItem(registers(), type1) || reg_class_map().count(type1);
+  if (!reg0 || !reg1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+
+  return pred;
+}
+
+// Check that an instruction has a specified number of operands.
+// If the instruction has variadic operands, we generally need to generate a
+// compile time test.
+PredExpr *MdlSpec::PredCheckNumOperand(PredExpr *pred,
+                                       const InstructionDef *instr) {
+  int index = std::stoi(pred->operands()[0]->value());
+  int num_operands = instr->num_flat_operands();
+  bool has_ellipsis = instr->has_ellipsis();
+
+  if (index < num_operands)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (has_ellipsis)
+    return pred;
+  auto opcode = (index != num_operands) ? PredOp::kFalse : PredOp::kTrue;
+  return new PredExpr(opcode, pred->negate());
+}
+
+// Check that an operand has a specific immediate value. There are several
+// things we can check:
+// - If the operand is a register operand, we can return kFalse.
+// - If it's an invalid operand index, we can return kFalse.
+// Otherwise we generate a compile-time check.
+PredExpr *MdlSpec::PredCheckIsImm(PredExpr *pred, const InstructionDef *instr) {
+  int index = PredOperandIndex(pred->operands()[0], instr);
+  if (index == -1)
+    return new PredExpr(PredOp::kFalse, pred->negate());
+  if (index >= instr->num_flat_operands())
+    return instr->has_ellipsis() ? pred
+                                 : new PredExpr(PredOp::kFalse, pred->negate());
+
+  // Check for register operands?
+  return pred;
+}
+
+// We generally need a compile-time check to look for specific immediate values,
+// so for now we just check that it's a valid immedidate operand.
+PredExpr *MdlSpec::PredCheckImm(PredExpr *pred, const InstructionDef *instr) {
+  return PredCheckIsImm(pred, instr);
+}
+
+// Ditto for PredCheckIsImm.
+PredExpr *MdlSpec::PredCheckZero(PredExpr *pred, const InstructionDef *instr) {
+  return PredCheckIsImm(pred, instr);
+}
+
+// When we evaluate an OpcodeSwitchStmt against a single instruction we can
+// trivally simplify the opcode-based switch statement to a single case and
+// return statement.
+PredExpr *MdlSpec::PredOpcodeSwitchStmt(PredExpr *pred,
+                                        const InstructionDef *instr) {
+  for (auto *cases : pred->operands()) {
+    // If we encounter a named predicate, find its associated predicate, which
+    // needs to be either a switch case or a return statement.
+    if (cases->opcode() == PredOp::kName)
+      cases = LookupPredicate(cases);
+
+    // We expect just SwitchCases and ReturnStatements. We handle these two
+    // cases inline, since they have a particular semantic we need
+    // to implement.
+    if (cases->opcode() == PredOp::kOpcodeSwitchCase) {
+      if (EvaluatePredicate(cases->operands()[0], instr)->IsTrue())
+        return EvaluatePredicate(cases->operands()[1]->operands()[0], instr);
+      continue;
+    }
+    // A ReturnStatement is the switch Default.  Just evaluate and return its
+    // underlying predicate.
+    if (cases->opcode() == PredOp::kReturnStatement)
+      return EvaluatePredicate(cases->operands()[0], instr);
+
+    // If the predicate isn't a SwitchCase or return statement, we have a
+    // poorly defined switch statement, so complain.
+    ErrorLog(pred, "Malformed switch predicate");
+  }
+  return new PredExpr(PredOp::kFalse, pred->negate());
+}
+
+// Write out a predicate expression for debug.
+std::string PredExpr::ToString(int indent) {
+  auto sep = "";
+  std::string out = formatv("{0}{1}{2}", std::string(indent * 2 + 2, ' '),
+                            negate() ? "!" : "", PredName());
+
+  switch (opcode()) {
+  case PredOp::kTrue:
+  case PredOp::kFalse:
+  case PredOp::kEmpty:
+    return out;
+
+  case PredOp::kOpcodeSwitchStmt:
+  case PredOp::kOpcodeSwitchCase:
+  case PredOp::kReturnStatement:
+  case PredOp::kCheckAny:
+  case PredOp::kCheckAll:
+  case PredOp::kCheckNot:
+    out += "<";
+    for (auto *opnd : operands())
+      out += formatv("\n{0}", opnd->ToString(indent + 1));
+    return out + ">";
+
+  case PredOp::kCheckOpcode:
+    out += " [";
+    for (auto *opnd : operands()) {
+      out += formatv("{0}{1}", sep, opnd->ToString(-1));
+      sep = ", ";
+    }
+    out += "]";
+    return out;
+
+  case PredOp::kCheckIsRegOperand:
+  case PredOp::kCheckIsImmOperand:
+  case PredOp::kCheckZeroOperand:
+  case PredOp::kCheckNumOperands:
+  case PredOp::kCheckRegOperand:
+  case PredOp::kCheckInvalidRegOperand:
+  case PredOp::kCheckImmOperand:
+  case PredOp::kCheckSameRegOperand:
+  case PredOp::kCheckFunctionPredicate:
+  case PredOp::kCheckFunctionPredicateWithTII:
+    out += "<";
+    for (auto *opnd : operands()) {
+      out += formatv("{0}{1}", sep, opnd->ToString(-1));
+      sep = ", ";
+    }
+    return out + ">";
+
+  case PredOp::kOperandRef:
+    return opnd()->ToString();
+  case PredOp::kString:
+  case PredOp::kNumber:
+  case PredOp::kName:
+  case PredOp::kCode:
+    return out + value();
+  }
+  return "Error";
+}
+
+// Simplify all predicates registered in the predicate table.
+void MdlSpec::SimplifyPredicates() {
+  for (auto [name, pred] : predicate_table_)
+    predicate_table_[name] = PredSimplify(pred);
+}
+
+// Simplify predicates if possible. In particular we want to propagate
+// negate operators (kCheckNot) down the expression.
+PredExpr *MdlSpec::PredSimplify(PredExpr *expr) {
+  auto &operands = expr->operands();
+
+  switch (expr->opcode()) {
+  // For Any/All case, if negated, reverse opcode and negate all operands.
+  case PredOp::kCheckAny:
+  case PredOp::kCheckAll:
+    if (expr->negate()) {
+      PredOp op = (expr->opcode() == PredOp::kCheckAll) ? PredOp::kCheckAny
+                                                        : PredOp::kCheckAll;
+      expr->set_opcode(op);
+      for (auto *operand : expr->operands())
+        operand->set_negate();
+    }
+    for (unsigned i = 0; i < operands.size(); i++)
+      operands[i] = PredSimplify(operands[i]);
+    expr->reset_negate();
+
+    // If they only have one operand, just return the single operand.
+    if (expr->operands().size() == 1)
+      return expr->operands()[0];
+    return expr;
+
+  // For NOT case, negate operand, and simplify it.
+  case PredOp::kCheckNot:
+    if (!expr->negate())
+      operands[0]->set_negate();
+    expr->reset_negate();
+    return PredSimplify(operands[0]);
+
+  case PredOp::kTrue:
+    if (expr->negate())
+      expr->set_opcode(PredOp::kFalse);
+    expr->reset_negate();
+    return expr;
+  case PredOp::kFalse:
+    if (expr->negate())
+      expr->set_opcode(PredOp::kTrue);
+    expr->reset_negate();
+    return expr;
+
+  case PredOp::kOpcodeSwitchStmt:
+  case PredOp::kOpcodeSwitchCase:
+  case PredOp::kReturnStatement:
+    for (unsigned i = 0; i < operands.size(); i++)
+      operands[i] = PredSimplify(operands[i]);
+    return expr;
+
+  default:
+    return expr;
+  }
+  return expr;
+}
+
+//-----------------------------------------------------------------------------
+// Predicate function generation code.
+//-----------------------------------------------------------------------------
+
+// Top level interface for generating a function to evaluate a predicate.
+std::string OutputState::FormatPredicateFunc(PredExpr *expr) {
+  auto &operands = expr->operands();
+
+  switch (expr->opcode()) {
+  case PredOp::kCheckAny:
+    return expr->CheckCompound(this);
+  case PredOp::kCheckAll:
+    return expr->CheckCompound(this);
+  case PredOp::kCheckNot:
+    return formatv("!({0})", FormatPredicateFunc(operands[0]));
+
+  case PredOp::kCheckIsRegOperand:
+  case PredOp::kCheckIsImmOperand:
+    return expr->OperandType();
+
+  case PredOp::kCheckInvalidRegOperand:
+    return expr->InvalidRegOperand();
+  case PredOp::kCheckRegOperand:
+    return expr->RegOperand(spec().family_name());
+  case PredOp::kCheckSameRegOperand:
+    return expr->SameRegOperand();
+  case PredOp::kCheckImmOperand:
+    return expr->ImmOperand();
+  case PredOp::kCheckZeroOperand:
+    return expr->ImmZeroOperand();
+  case PredOp::kCheckFunctionPredicate:
+    return expr->FunctionPredicate(false, this);
+  case PredOp::kCheckFunctionPredicateWithTII:
+    return expr->FunctionPredicate(true, this);
+  case PredOp::kCheckNumOperands:
+    return expr->NumOperands();
+
+  case PredOp::kCode:
+    return expr->CheckCode(this);
+  case PredOp::kName: {
+    std::string out =
+        FormatPredicateFunc(spec().predicate_table()[expr->value()]);
+    if (expr->negate())
+      return formatv("!({0})", out);
+    return out;
+  }
+
+  // These should be all resolved, and don't need to be formatted.
+  case PredOp::kOpcodeSwitchStmt:
+  case PredOp::kOpcodeSwitchCase:
+  case PredOp::kCheckOpcode:
+  case PredOp::kReturnStatement:
+  case PredOp::kNumber:
+  case PredOp::kString:
+  default:
+    return "ERROR!!!!";
+
+  case PredOp::kTrue:
+    return "true";
+  case PredOp::kFalse:
+    return "false";
+  case PredOp::kEmpty:
+    return "empty";
+  }
+
+  return "";
+}
+
+std::string PredExpr::GetOperand(PredExpr *index) const {
+  return formatv("MI->getOperand({0})", index->value());
+}
+
+std::string PredExpr::OperandType() const {
+  PredExpr *index = operands_[0];
+  auto type = (opcode_ == PredOp::kCheckIsRegOperand) ? "isOpndRegister"
+                                                      : "isOpndLiteral";
+  auto op = negate() ? "!" : "";
+  return formatv("{0}MI->{1}({2})", op, type, index->value());
+}
+
+std::string PredExpr::InvalidRegOperand() const {
+  PredExpr *index = operands_[0];
+  auto op = negate() ? "!=" : "==";
+  return formatv("{0} {1} 0", GetOperand(index), op);
+}
+
+std::string PredExpr::RegOperand(const std::string &family) const {
+  PredExpr *index = operands_[0];
+  PredExpr *reg = operands_[1];
+  auto func = (operands_.size() == 3) ? operands_[2]->value() : "";
+  if (!func.empty())
+    func = func.substr(1, func.length() - 2);
+
+  auto getReg = GetOperand(index);
+  if (!func.empty())
+    getReg = formatv("{0}({1})", func, getReg);
+  auto op = negate() ? "!=" : "==";
+  auto val = formatv("{0}::{1}", family, reg->value());
+  if (reg->value().empty())
+    val = "0";
+  return formatv("{0} {1} {2}", getReg, op, val);
+}
+
+std::string PredExpr::SameRegOperand() const {
+  PredExpr *reg0 = operands_[0];
+  PredExpr *reg1 = operands_[1];
+
+  auto op = negate() ? "!=" : "==";
+  return formatv("{0} {1} {2}", GetOperand(reg0), op, GetOperand(reg1));
+}
+
+std::string PredExpr::ImmOperand() const {
+  PredExpr *index = operands_[0];
+  PredExpr *value = operands_[1];
+  auto func = (operands_.size() == 3) ? operands_[2]->value() : "";
+  if (!func.empty())
+    func = func.substr(1, func.length() - 2);
+
+  auto getImm = GetOperand(index);
+  if (!func.empty())
+    getImm = formatv("{0}({1})", func, getImm);
+  auto val = value->value();
+  if (val[0] == '\"')
+    val = val.substr(1, val.length() - 2);
+  if (val.empty())
+    return formatv("{0}{1}", negate() ? "!" : "", getImm);
+
+  auto op = negate() ? "!=" : "==";
+  return formatv("{0} {1} {2}", getImm, op, val);
+}
+
+std::string PredExpr::ImmZeroOperand() const {
+  PredExpr *index = operands_[0];
+
+  auto getImm = GetOperand(index);
+  auto op = negate() ? "!=" : "==";
+  return formatv("{0} {1} 0", getImm, op);
+}
+
+std::string PredExpr::NumOperands() const {
+  auto op = negate() ? "!=" : "==";
+  return formatv("MI->getMI()->getNumOperands() {0} {1}", op,
+                 operands_[0]->value());
+}
+
+std::string PredExpr::CheckCompound(OutputState *spec) {
+  std::string out;
+  std::string sep = "";
+  std::string op = (opcode() == PredOp::kCheckAll) ? " && " : " || ";
+
+  for (auto *operand : operands()) {
+    out += formatv("{0}{1}", sep, spec->FormatPredicateFunc(operand));
+    sep = op;
+  }
+  return formatv("({0})", out);
+}
+
+//----------------------------------------------------------------------
+// This is a huge kludge to cope with the "PredicateProlog" tablegen
+// hack to communicate the target base class name to the predicate
+// function.  Currently only three targets use this feature.
+//----------------------------------------------------------------------
+// Note an alternative approach would be to require a "using" clause
+// in the <TARGET>Subtarget.cpp (that includes the generated file) that
+// specifies the target object name, ie ARMBaseInstrInfo, etc.
+//----------------------------------------------------------------------
+// Another approach would be to parse the PredicateProlog record in
+// TdScan and pass that information through the generated MDL file.
+// Since its potentially arbitrary C++ code, that could be tricky.
+//----------------------------------------------------------------------
+static std::string InstrInfoName(const std::string &family) {
+  if (family == "ARM")
+    return "ARMBaseInstrInfo";
+  if (family == "AArch64")
+    return "AArch64InstrInfo";
+  if (family == "AMDGPU")
+    return "SIInstrInfo";
+  return formatv("{0}InstrInfo", family);
+}
+
+// Given an input string and an offset, find the next identifier in the string
+// and return it. The "loc" parameter points to the end of the identifier
+static std::string FindId(std::string input, size_t &loc) {
+  // Find next alphabetic character.
+  char ch;
+  std::string result;
+  for (ch = input[loc]; ch && !(isalpha(ch) || ch == '_'); ch = input[++loc]) {
+  }
+  for (; ch && (isalnum(ch) || ch == '_'); ch = input[++loc])
+    result.push_back(ch);
+  return result;
+}
+
+static std::string ExpandVariables(std::string body, std::string family,
+                                   bool &TII_seen) {
+  // Fetch the target's InstrInfo name (from the PredicateProlog record).
+  std::string tii =
+      formatv("static_cast<const {0}*>(MI->getTII())", InstrInfoName(family));
+
+  // Replace references to MI with Instr object references.  Replace TII
+  // with the target's InstrInfo name.
+  size_t loc = 0;
+  for (auto id = FindId(body, loc); !id.empty(); id = FindId(body, loc)) {
+    if (id == "MI") {
+      body = body.insert(loc, "->getMI()");
+      loc += 6;
+    } else if (id == "TII") {
+      body = body.replace(loc - 3, 3, tii);
+      loc += tii.size() - 3;
+      TII_seen = true;
+    }
+  }
+  return body;
+}
+
+// Code predicates must work with MachineInstr AND MCInst objects. We need
+// to replace references to (*MI) and (MI) with a reference to the object's
+// machine instruction pointer.
+std::string PredExpr::CheckCode(OutputState *spec) const {
+  bool TII_seen = false;
+  std::string input = value();
+  std::string body = input.substr(2, input.length() - 4);
+
+  body = ExpandVariables(body, spec->spec().family_name(), TII_seen);
+
+  // Create the body of the virtual function, add it to the virtual function
+  // table, and generate a call to that function via its index.
+  std::string neg = negate() ? "!" : "";
+  std::string out = formatv("{0}({1})", neg, body);
+  if (!TII_seen)
+    return "(MI->isMI() && " + out + ")";
+  auto vfunc = formatv("  return {0};", out);
+  auto index = OutputState::AddEntry(spec->virtual_ref_predicates(), vfunc);
+  return formatv("(MI->isMI() && MI->evaluatePredicate({0}))", index);
+}
+
+std::string PredExpr::FunctionPredicate(bool withTII, OutputState *spec) const {
+  // If withTII is specified, we need to pass target information to the
+  // function.  For Machine instructions, this is a TII-> prefix.  For MCInst
+  // versions, we pass an extra parameter.
+  std::string tii;
+  std::string mcii = withTII ? ", MI->getMCII())" : ")";
+
+  if (withTII) {
+    if (operands_.size() == 3 && operands_[2]->value() != "\"TII\"") {
+      tii = operands_[2]->value();
+      tii = tii.substr(1, tii.length() - 2); // Strip quotes
+      tii += "->";
+    } else {
+      // Fetch the target's InstrInfo name (from the PredicateProlog record).
+      tii = formatv("static_cast<const {0}*>(MI->getTII())->",
+                    InstrInfoName(spec->spec().family_name()));
+    }
+  }
+
+  std::string neg = negate() ? "!" : "";
+
+  auto MCfunc = operands_[0]->value(); // MCInst function
+  if (!MCfunc.empty())
+    MCfunc = MCfunc.substr(1, MCfunc.length() - 2);
+  if (!MCfunc.empty())
+    MCfunc = MCfunc + "(*MI->getMC()" + mcii;
+
+  auto MIfunc = operands_[1]->value(); // MachineInstr function
+  if (!MIfunc.empty())
+    MIfunc = MIfunc.substr(1, MIfunc.length() - 2);
+  if (!MIfunc.empty())
+    MIfunc = tii + MIfunc + "(*MI->getMI())";
+
+  if (MIfunc.empty())
+    return formatv("(MI->isMC() && {0}{1})", neg, MCfunc);
+
+  // Create the body of the virtual function, add it to the virtual function
+  // table, and generate a call to that function via its index.
+  auto vfunc = formatv("  return {0};", MIfunc);
+  auto index = OutputState::AddEntry(spec->virtual_ref_predicates(), vfunc);
+  MIfunc = formatv("MI->evaluatePredicate({0})", index);
+
+  if (MCfunc.empty())
+    return "(MI->isMI() && " + MIfunc + ")";
+
+  return neg + "(MI->isMC() ? " + MCfunc + " : " + MIfunc + ")";
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/MdlCompiler/mdl_util.cpp b/llvm/utils/MdlCompiler/mdl_util.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_util.cpp
@@ -0,0 +1,1423 @@
+//===- mdl_util.cpp - Miscellaneous utilities for MDL error checking ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of methods that do basic semantic checking on the input mdl.
+//    - Check for duplicate definitions (templates, resources, etc)
+//    - For each template, make sure its bases exist and have compatible
+//      parameters.
+//    - For each instantiation, make sure its template exists and has
+//      compatible parameters.
+//
+// As part of error checking, we link various components together so that
+// later passes don't have to repeatedly perform name lookups:
+//    - link (fu/su/lat) instances to their associated templates.
+//    - link instance arguments to their associated template parameters.
+//    - link templates to their base templates.
+//
+// Note that we don't do ALL name lookups here, since this is done in
+// template instantiation.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mdl.h"
+#include "llvm/Support/Error.h"
+
+namespace mpact {
+namespace mdl {
+
+// Create some "Null" resource definitions.
+ResourceRef *NullResourceRef = nullptr;
+RegisterClass *NullRegisterClass = nullptr;
+ResourceDef *NullPortDef = nullptr;
+
+//----------------------------------------------------------------------------
+// Basic error logging.
+//----------------------------------------------------------------------------
+void Abort() {
+  llvm::errs() << "Errors found, aborting\n";
+  exit(EXIT_FAILURE);
+}
+
+void MdlSpec::WriteMessage(const MdlItem *item, const std::string &msg) {
+  if (item == nullptr) {
+    llvm::errs() << formatv("Error: {0}\n", msg);
+    return;
+  }
+  std::string message = formatv("{0} {1}", item->Location(), msg);
+  // If we've already see this exact message, don't print it out again!
+  // This is fairly common, since latencies and subunits are potentially
+  // instantiated many times.
+  if (!error_messages_.count(message)) {
+    error_messages_.insert(message);
+    llvm::errs() << message << "\n";
+  }
+}
+
+int SubUnitInstantiation::ErrorsSeen() const { return spec()->ErrorsSeen(); }
+int FuncUnitInstantiation::ErrorsSeen() const { return spec()->ErrorsSeen(); }
+
+//----------------------------------------------------------------------------
+// Pipeline Phase expression methods.
+//----------------------------------------------------------------------------
+// Find the phase reference in an expression and return it.
+PhaseName *PhaseExpr::GetPhaseName() const {
+  PhaseName *name;
+  if (operation() == kPhase)
+    return phase_name_;
+  if (left() && (name = left()->GetPhaseName()))
+    return name;
+  if (right() && (name = right()->GetPhaseName()))
+    return name;
+  return nullptr;
+}
+
+// Early check to see if there's at least one symbolic name in a phase expr.
+bool PhaseExpr::HasPhaseName() const {
+  if (operation() == kPhase)
+    return true;
+  if (left() && left()->HasPhaseName())
+    return true;
+  if (right() && right()->HasPhaseName())
+    return true;
+  return false;
+}
+
+// Return true if an expression doesn't contain any operand references.
+bool PhaseExpr::IsExpressionConstant() const {
+  if (left() && !left()->IsExpressionConstant())
+    return false;
+  if (right() && !right()->IsExpressionConstant())
+    return false;
+  return operation() != kOpnd;
+}
+
+// Return true if an expression is "legal". Currently this means it doesn't
+// have any provable divisions by 0, and if it's a constant expression it
+// returns a valid value at the top level.
+bool PhaseExpr::IsExpressionLegal() const {
+  if (!IsSubexpressionLegal())
+    return false;
+  if (IsExpressionConstant() && EvaluateConstantExpression() < 0)
+    return false;
+  return true;
+}
+
+// Return true if a subexpression is "legal". This checks for division by 0.
+bool PhaseExpr::IsSubexpressionLegal() const {
+  if (left() && !left()->IsSubexpressionLegal())
+    return false;
+  if (right() && !right()->IsSubexpressionLegal())
+    return false;
+  if (operation() == kDiv)
+    if (right()->IsExpressionConstant() &&
+        right()->EvaluateConstantExpression() == 0)
+      return false;
+  return true;
+}
+
+// Evaluate a constant expression.  Note: this code assumes that the
+// expression has been determined to be both constant and "legal", so
+// division by zero doesn't need to be checked here.
+int PhaseExpr::EvaluateConstantExpression() const {
+  int left_value, right_value;
+  if (left())
+    left_value = left()->EvaluateConstantExpression();
+  if (right())
+    right_value = right()->EvaluateConstantExpression();
+
+  if (operation() == kPlus)
+    return left_value + right_value;
+  if (operation() == kMinus)
+    return left_value - right_value;
+  if (operation() == kMult)
+    return left_value * right_value;
+  if (operation() == kDiv)
+    return left_value / right_value;
+  if (operation() == kNeg)
+    return -left_value;
+  if (operation() == kPositive)
+    return std::max(0, left_value);
+  if (operation() == kInt)
+    return number();
+  if (operation() == kPhase)
+    return phase_id();
+  return 0; // Cannot reach here.
+}
+
+//----------------------------------------------------------------------------
+// Make sure the overall specification has some basic components defined.
+// We do this rather late in the semantic checking phase, because some of
+// the components are auto-generated during semantic checking.
+//----------------------------------------------------------------------------
+void MdlSpec::CheckInputStructure() {
+  if (family_name_.empty())
+    ErrorLog(nullptr, "Specify a processor family name");
+  if (cpus_.empty())
+    ErrorLog(nullptr, "Specify at least one cpu definition");
+  if (pipe_phases_.empty())
+    ErrorLog(nullptr, "Specify at least one pipeline definition.");
+  if (func_units_.empty())
+    ErrorLog(nullptr, "Specify at least one functional unit definition.");
+  if (subunits_.empty())
+    ErrorLog(nullptr, "Specify at least one subunit definition.");
+  if (latencies_.empty())
+    ErrorLog(nullptr, "Specify at least one latency definition.");
+  if (instructions_.empty())
+    ErrorLog(nullptr, "Specify at least one instruction definition.");
+
+  if (ErrorsSeen())
+    Abort();
+}
+
+//----------------------------------------------------------------------------
+// Initialize some global subobjects of the MdlSpec object:
+// - Create dictionaries for functional units, subunits, latencies, operands,
+//   instructions, groups, and register classes.  Check for duplicates.
+// - Build instantiation objects for subunits.
+// - Initialize some global null object descriptors.
+//----------------------------------------------------------------------------
+void MdlSpec::BuildDictionaries() {
+  for (auto *fu : func_units())
+    if (!fu_map().emplace(fu->name(), fu).second)
+      ErrorLog(fu, "Duplicate functional unit definition: {0}", fu->name());
+
+  // Group names cannot conflict with themselves or functional unit names.
+  for (auto *group : func_unit_groups()) {
+    if (!fu_group_map().emplace(group->name(), group).second)
+      ErrorLog(group, "Duplicate functional unit group definition: {0}",
+               group->name());
+    if (FindItem(fu_map(), group->name()) != nullptr)
+      ErrorLog(group, "Group name conflicts with functional unit name: {0}",
+               group->name());
+  }
+
+  for (auto *su : subunits())
+    if (!su_map().emplace(su->name(), su).second)
+      ErrorLog(su, "Duplicate subunit definition: {0}", su->name());
+
+  for (auto *lat : latencies())
+    if (!lat_map().emplace(lat->name(), lat).second)
+      ErrorLog(lat, "Duplicate latency definition: {0}", lat->name());
+
+  for (auto *opnd : operands())
+    if (!operand_map().emplace(opnd->name(), opnd).second)
+      ErrorLog(opnd, "Duplicate operand definition: {0}", opnd->name());
+
+  for (auto *instr : instructions())
+    if (!instruction_map().emplace(instr->name(), instr).second)
+      ErrorLog(instr, "Duplicate instruction definition: {0}", instr->name());
+
+  for (auto *rclass : reg_classes())
+    if (!reg_class_map().emplace(rclass->name(), rclass).second)
+      ErrorLog(rclass, "Duplicate register class definition: {0}",
+               rclass->name());
+
+  // Initialize a vector of instantiations for every subunit template.
+  for (auto *su : subunits())
+    su_instantiations()[su->name()] = new std::vector<SubUnitInstantiation *>;
+
+  // Create some "Null" resource definitions.
+  NullResourceRef = new ResourceRef("__");
+  NullRegisterClass = new RegisterClass("__");
+  NullPortDef = new ResourceDef("__");
+
+  // Scan over all CPU specs, collect feasible predicate names.
+  FindValidPredicateNames();
+}
+
+//---------------------------------------------------------------------------
+// Look for implicitly defined functional unit templates, and create a
+// definition for them. Implicitly defined functional units can occur in
+// any CPU definition as a functional unit instance, or as a base of a
+// functional unit template, or as a member of a group.
+// Note that we DON'T allow implicit template definitions to have parameters.
+// Also note that we will check if these are ever associated with any
+// subunits - if they are not, its probably an error.
+//---------------------------------------------------------------------------
+void MdlSpec::FindImplicitFuncUnitTemplates() {
+  // Check each functional unit instance in each CPU definition.
+  for (auto *cpu : cpus())
+    for (auto *cluster : *cpu->clusters())
+      for (auto *unit : *cluster->func_units()) {
+        auto *type = unit->type();
+        if (FindItem(fu_map_, type->name()) == nullptr) {
+          auto *fu = new FuncUnitTemplate(type);
+          fu_map_.emplace(type->name(), fu);
+          func_units_.push_back(fu);
+        }
+      }
+
+  // Check that each base of a functional unit template is defined. If its not,
+  // then define it as an implicitly defined functional unit.
+  for (auto [name, unit] : fu_map_) {
+    for (auto *base : *unit->bases())
+      if (FindItem(fu_map_, base->name()) == nullptr) {
+        auto *fu = new FuncUnitTemplate(base);
+        func_units_.push_back(fu);
+        fu_map_.emplace(base->name(), fu);
+      }
+  }
+
+  // Check all the names in an FU group.  They should either refer to a group,
+  // or a functional unit.  If they are undefined, report an error.
+  for (auto [name, group] : fu_group_map())
+    for (auto *id : *group->members())
+      if (FindItem(fu_map_, id->name()) == nullptr)
+        ErrorLog(id, "Undefined functional unit reference: {0}", id->name());
+}
+
+//---------------------------------------------------------------------------
+// Create a set of all feasible names for predicates.  If not found in
+// this list, a predicate use will elicit a warning.  We do this because
+// a misspelled predicate will *always* fail silently, so we want to find
+// predicates that can -never- be true.  This isn't strictly an error, but
+// it is most likely a typo.
+// We allow three kinds of predicates:
+//   - defined CPU names
+//   - Instantiated functional unit names
+//   - Functional unit template names.
+//---------------------------------------------------------------------------
+void MdlSpec::FindValidPredicateNames() {
+  for (auto *cpu : cpus()) {
+    valid_predicate_names_.emplace(cpu->name());
+    for (auto *cluster : *cpu->clusters())
+      for (auto *fu_inst : *cluster->func_units())
+        if (fu_inst->id())
+          valid_predicate_names_.emplace(fu_inst->name());
+  }
+
+  for (auto *func_template : func_units())
+    valid_predicate_names_.emplace(func_template->name());
+}
+
+//---------------------------------------------------------------------------
+// Check that a predicate name is at least feasible.  If it is not, generate
+// a warning.
+//---------------------------------------------------------------------------
+void MdlSpec::IsValidPredicateName(const Identifier *name) {
+  if (!valid_predicate_names_.count(name->name()))
+    WarningLog(name, "Predicate is invalid: {0}", name->name());
+}
+
+//---------------------------------------------------------------------------
+// Sanity check the input for duplicate definitions.  We do this before
+// any resource promotions so that we don't get lots of duplicate error
+// messages.  Any errors found here are considered fatal, so just abort.
+//---------------------------------------------------------------------------
+// NOTES:
+//  - Things we have dictionaries for (functional units, subunits, latencies,
+//    operands, instructions, groups, and register classes) are checked against
+//    themselves when they are created. They may still need to be checked
+//    against other types of objects.
+//  - Not all of the namespaces interfere. CPUs, clusters, and templates
+//    (functional units, subunits, and latencies) have their own scope.
+//  - Resource member lists have their own namespaces, but names must be
+//    unique within that space.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckForDuplicateDefs() {
+  // Check that phase names groups and phase names are unique.
+  CheckPhaseDefinitions(&pipe_phases());
+
+  // Check that globally defined resources, registers and classes are unique.
+  FindDuplicates(resources());
+  FindDuplicates(registers());
+  FindDuplicates(registers(), resources());
+  FindDuplicates(resources(), reg_classes());
+  FindDuplicates(registers(), reg_classes());
+  FindDuplicateMembers(resources());
+
+  // Operand names cannot conflict with registers or register class names.
+  FindDuplicates(reg_classes(), operands());
+  FindDuplicates(registers(), operands());
+
+  // For functional unit templates, look for redefinitions of parameters,
+  // registers, resources, or ports. We do allow locally defined resources and
+  // ports to hide globally defined objects.
+  for (auto *funit : func_units()) {
+    FindDuplicates(*funit->params());
+    FindDuplicates(*funit->resources());
+    FindDuplicates(*funit->ports());
+    FindDuplicateMembers(*funit->resources());
+
+    // Don't allow redefinitions across object types (ports, resources).
+    FindDuplicates(*funit->ports(), *funit->resources());
+
+    // Don't allow shadowing of template parameters.
+    FindDuplicates(*funit->resources(), *funit->params());
+    FindDuplicates(*funit->ports(), *funit->params());
+  }
+
+  // For subunit and latency templates, check for unique parameters.
+  for (auto *sunit : subunits())
+    FindDuplicates(*sunit->params());
+  for (auto *lat : latencies())
+    FindDuplicates(*lat->params());
+
+  // CPU's have a separate namespace, but redefinitions aren't allowed.
+  // For each cpu definition, check for redefinitions of registers,
+  // resources, and functional unit instance names.
+  // For each cluster, check cluster names, register/resource names, and
+  // functional unit instances.
+  FindDuplicates(cpus());
+  for (auto *cpu : cpus()) {
+    // Check that phase names groups and phase names are unique.
+    // Note that these can mask globally defined phase definitions.
+    CheckPhaseDefinitions(cpu->pipe_phases());
+
+    // Make sure CPU definitions aren't masking globally defined things.
+    FindDuplicates(*cpu->resources());
+    FindDuplicates(*cpu->resources(), resources());
+    FindDuplicates(*cpu->resources(), registers());
+    FindDuplicates(*cpu->resources(), reg_classes());
+    FindDuplicates(*cpu->clusters());
+    FindDuplicateMembers(*cpu->resources());
+
+    for (auto *cluster : *cpu->clusters()) {
+      FindDuplicates(*cluster->resources());
+      FindDuplicates(*cluster->func_units());
+      FindDuplicates(*cluster->issues());
+      FindDuplicates(*cluster->issues(), *cluster->func_units());
+      FindDuplicates(*cluster->issues(), *cluster->resources());
+      FindDuplicates(*cluster->resources(), *cpu->resources());
+      FindDuplicates(*cluster->resources(), resources());
+      FindDuplicates(*cluster->resources(), registers());
+      FindDuplicates(*cluster->resources(), reg_classes());
+      FindDuplicateMembers(*cluster->resources());
+
+      // If this is a generated (promoted) cluster, we also check this
+      // cluster's definitions against CPU-level definitions.
+      if (cluster->IsNull()) {
+        FindDuplicates(*cluster->issues(), *cpu->resources());
+        FindDuplicates(*cluster->issues(), *cpu->clusters());
+        FindDuplicates(*cluster->func_units(), *cpu->resources());
+        FindDuplicates(*cluster->func_units(), *cpu->clusters());
+      }
+    }
+  }
+
+  // For each instruction definition, check for duplicate operand names.
+  for (auto *instruct : instructions())
+    FindDuplicates(*instruct->operands());
+
+  // For each operand definition, check for duplicate (sub)operand names and
+  // duplicate attribute definitions.
+  for (auto *operand : operands())
+    FindDuplicates(*operand->operands());
+}
+
+//---------------------------------------------------------------------------
+// Look up a phase name in the pipeline definition set.  First look for
+// CPU-specific phases, and if not found look for a global definition.
+// Return null if it's not found anywhere.
+//---------------------------------------------------------------------------
+PhaseName *MdlSpec::SearchPipeReference(Identifier *phase, CpuInstance *cpu) {
+  if (cpu)
+    for (auto *p1 : *cpu->pipe_phases())
+      if (auto *item = FindItem(*p1->phase_names(), phase->name()))
+        return item;
+
+  for (auto *p1 : pipe_phases())
+    if (auto *item = FindItem(*p1->phase_names(), phase->name()))
+      return item;
+
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Look up a phase name in the pipeline definition set.
+// Print an error message if not found.
+// Return a pointer to the object if found, or a fake object if not found.
+//---------------------------------------------------------------------------
+PhaseName *MdlSpec::FindPipeReference(Identifier *phase, CpuInstance *cpu) {
+  if (auto *item = SearchPipeReference(phase, cpu))
+    return item;
+
+  // This is ultimately a fatal error.  Return a fake object.
+  ErrorLog(phase, "Pipeline phase \"{0}\" not found for cpu: {1}",
+           phase->name(), cpu->name());
+  return new PhaseName(*phase, "", false, false);
+}
+
+//---------------------------------------------------------------------------
+// Check that phase names groups and phase names are unique.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckPhaseDefinitions(PipeDefList *pipes) {
+  FindDuplicates(*pipes);
+
+  for (auto *p1 : *pipes) {
+    FindDuplicates(*p1->phase_names());
+    for (auto *p2 : *pipes)
+      if (p1 != p2)
+        FindDuplicates(*p1->phase_names(), *p2->phase_names());
+  }
+}
+
+//---------------------------------------------------------------------------
+// Specialize a phase expression for the context its instantiated in.
+// Return true if at least one valid pipeline phase is found.
+//---------------------------------------------------------------------------
+bool MdlSpec::SpecializePhaseExpr(PhaseExpr *expr, CpuInstance *cpu) {
+  if (expr->operation() == kPhase) {
+    auto *phase = FindPipeReference(expr->phase(), cpu);
+    expr->set_phase_name(phase);
+    return !phase->name().empty();
+  }
+
+  bool found = false;
+  if (expr->left())
+    found |= SpecializePhaseExpr(expr->left(), cpu);
+  if (expr->right())
+    found |= SpecializePhaseExpr(expr->right(), cpu);
+  return found;
+}
+
+//---------------------------------------------------------------------------
+// If a resource definition has a start_phase or end_phase specified,
+// look them up in the pipe phase definitions.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckPipeReference(ResourceDef *def, CpuInstance *cpu) {
+  if (def->start_phase() != nullptr)
+    FindPipeReference(def->start_phase(), cpu);
+  if (def->end_phase() != nullptr)
+    FindPipeReference(def->end_phase(), cpu);
+}
+
+//---------------------------------------------------------------------------
+// Quick check that reference phases contain at least one phase name.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckReferencePhases(ReferenceList *refs) {
+  if (refs == nullptr)
+    return;
+  for (auto *ref : *refs) {
+    // Functional unit refs don't always have an explicit phase expression.
+    // We need to add one, but must do it later when we instantiate it.
+    if (ref->IsFuncUnitRef() && ref->phase_expr() == nullptr)
+      continue;
+    // For conditional refs, recur on the then/else clauses.
+    if (ref->IsConditionalRef()) {
+      for (auto *cc = ref->conditional_ref(); cc; cc = cc->else_clause())
+        CheckReferencePhases(&cc->refs());
+      continue;
+    }
+    // Normal case - make sure there's a phase name mentioned somewhere.
+    if (!ref->phase_expr()->HasPhaseName())
+      ErrorLog(ref->phase_expr(), "Invalid phase: missing phase name");
+  }
+}
+
+//---------------------------------------------------------------------------
+// Look up references to pipeline phases.
+// These occur in resource definitions, issue definitions, and references.
+// Any errors found here aren't immediately fatal, so we always return.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckPipeReferences() {
+  // Check resources defined globally.
+  for (auto *res : resources())
+    CheckPipeReference(res, nullptr);
+
+  // Check resources defined in CPUs and CPU clusters.
+  for (auto *cpu : cpus()) {
+    for (auto *issue : *cpu->issues())
+      CheckPipeReference(issue, cpu);
+    for (auto *res : *cpu->resources())
+      CheckPipeReference(res, cpu);
+    for (auto *cluster : *cpu->clusters()) {
+      for (auto *issue : *cluster->issues())
+        CheckPipeReference(issue, cpu);
+      for (auto *res : *cluster->resources())
+        CheckPipeReference(res, cpu);
+    }
+  }
+
+  // We can't statically check phase references in latency template definitions.
+  // Phase names are defined on a per-cpu basis, so we we need to check
+  // templates' phase references when we instantiate the template. This happens
+  // later in the compilation process.
+
+  // That said, we -can- check that phase expressions contain at least ONE
+  // phase name.  We do this to disallow references to phase indexes.
+  for (const auto *latency : latencies())
+    CheckReferencePhases(latency->references());
+}
+
+//---------------------------------------------------------------------------
+// Make sure functional unit, subunit, and latency templates are declared
+// with compatible parameters with their bases (types and numbers).
+// Return true if any errors are found.
+//---------------------------------------------------------------------------
+// NOTE: The strictest possible policy would be that the number, order,
+// name, and type of parameters must be identical.  This could be
+// relaxed in the future, with several possible policies:
+//  - extra parameters in parent template are ok (supported).
+//  - missing base parameters are passed null (not supported).
+//  - order of parameters doesn't matter, just match on name (not supported).
+//---------------------------------------------------------------------------
+void MdlSpec::SameParams(const ParamsList *params,
+                         const ParamsList *base_params, MdlItem *item) {
+  const bool base_can_have_extra_params = false; // not supported downstream.
+  const bool base_can_have_fewer_params = true;
+
+  // Check that the number of parameters is compatible.
+  if ((!base_can_have_extra_params && (params->size() < base_params->size())) ||
+      (!base_can_have_fewer_params && (params->size() > base_params->size()))) {
+    ErrorLog(item, "Incompatible parameters for template and base");
+    return;
+  }
+
+  int min_params = std::min(params->size(), base_params->size());
+
+  for (int idx = 0; idx < min_params; idx++) {
+    if ((*params)[idx]->type() != (*base_params)[idx]->type())
+      ErrorLog(item, "Unmatched parameter types for template and base");
+    if ((*params)[idx]->name() != (*base_params)[idx]->name())
+      ErrorLog(item, "Unmatched parameter names for template and base");
+  }
+}
+
+//---------------------------------------------------------------------------
+// Some helper template functions for finding cycles in MDL template
+// definitions.  Recursive templates are not allowed.
+//---------------------------------------------------------------------------
+template <class T>
+bool FindCycle(T *unit, std::set<std::string> &visited,
+               std::set<std::string> &recur, MdlSpec *md, std::string &type) {
+  visited.insert(unit->name());
+  recur.insert(unit->name());
+
+  for (auto *child : unit->unit_bases()) {
+    if (visited.count(child->name()) == 0 &&
+        FindCycle(child, visited, recur, md, type))
+      return true;
+    if (recur.count(child->name())) {
+      md->ErrorLog(child, "Recursively defined {0} template: {1}", type,
+                   child->name());
+      return true;
+    }
+  }
+  recur.erase(unit->name());
+  return false;
+}
+
+template <class T> void FindCycles(T &item, MdlSpec *md, std::string type) {
+  std::set<std::string> visited, recur;
+  for (auto *unit : item)
+    FindCycle(unit, visited, recur, md, type);
+}
+
+//---------------------------------------------------------------------------
+// Some helper template functions for duplicate bases in MDL template
+// definitions.  Duplicate bases in general are not allowed.  This is a
+// pretty naive implementation - derived units aren't so common that this
+// needs to be particularly efficient.
+//---------------------------------------------------------------------------
+template <class T>
+void FindAllBases(T *top_unit, T *unit, MdlSpec *md, std::string &type,
+                  std::set<std::string> &seen) {
+  for (auto *base : unit->unit_bases()) {
+    if (seen.count(base->name()))
+      md->ErrorLog(top_unit, "{0} template {1} has duplicate bases: {2}", type,
+                   top_unit->name(), base->name());
+    seen.insert(base->name());
+    FindAllBases(top_unit, base, md, type, seen);
+  }
+}
+
+// Ensure that a template doesn't have duplicate bases. Note that this
+// assumes we've already checked for recursively defined templates.
+template <class T>
+void FindDuplicateBases(T &item, MdlSpec *md, std::string type) {
+  for (auto *unit : item) {
+    std::set<std::string> seen;
+    FindAllBases(unit, unit, md, type, seen);
+  }
+}
+
+//---------------------------------------------------------------------------
+// Check validity of template bases for functional units, subunits, latencies
+// and functional unit groups, and link the template to its bases.
+// Functional unit and latency template bases must have parameters which are
+// compatible with the base. We also explicitly check for recursively
+// defined templates. Any errors found here are considered fatal, so just abort.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckTemplateBases() {
+  for (auto fu : func_units())
+    if (auto *bases = fu->bases()) {
+      for (auto *base : *bases) {
+        if (auto *fu_base = FindItem(fu_map(), base->name())) {
+          fu->add_base(fu_base);
+          SameParams(fu->params(), fu_base->params(), fu);
+        } else {
+          ErrorLog(fu, "Undefined functional unit base: {0}", base->name());
+        }
+      }
+    }
+
+  // Check that subunit bases exist, and link the template to its bases. If
+  // there are any string bases, use these to tie the subunit to a matched set
+  // of instructions.
+  // Unlike other template bases, subunit templates can have different
+  // parameters than their bases (by design), so we don't check for parameter
+  // compatibility.
+  for (auto *su : subunits()) {
+    if (auto *bases = su->bases()) {
+      for (auto *base : *bases) {
+        if (auto *su_base = FindItem(su_map(), base->name())) {
+          su->add_base(su_base);
+          su_base->add_derived_subunit(su);
+        } else {
+          ErrorLog(su, "Undefined subunit base: {0}", base->name());
+        }
+      }
+    }
+    TieSubUnitToInstructions(su, su->regex_bases());
+  }
+
+  for (auto *latency : latencies())
+    if (auto *bases = latency->base_ids()) {
+      for (auto *base : *bases)
+        if (auto *lat_base = FindItem(lat_map(), base->name())) {
+          latency->add_base(lat_base);
+          SameParams(latency->params(), lat_base->params(), latency);
+        } else {
+          ErrorLog(latency, "Undefined latency base: {0}", base->name());
+        }
+    }
+
+  // If a functional unit group includes an FU group, expand that group into
+  // the parent group.
+  for (auto *group : func_unit_groups())
+    if (!ExpandGroup(group, group->members(), 0))
+      break;
+
+  // Check that we don't have recursive derivations for templates.
+  FindCycles(func_units(), this, "functional unit");
+  FindCycles(subunits(), this, "subunit");
+  FindCycles(latencies(), this, "latency");
+
+  if (ErrorsSeen())
+    Abort();
+
+  // Check for duplicate bases.  Functional units *can* have duplicate bases,
+  // and duplicate bases for latencies are relatively common and harmless.
+  // But subunits cannot have duplicate bases.
+  FindDuplicateBases(subunits(), this, "Subunit");
+
+  if (ErrorsSeen())
+    Abort(); // If any errors found, abort.
+}
+
+//---------------------------------------------------------------------------
+// Link all the members of functional unit group to the group.  Subgroups
+// are allowed, but we need to check for recursion.
+// This function returns true if no errors were found, else returns false.
+//---------------------------------------------------------------------------
+bool MdlSpec::ExpandGroup(FuncUnitGroup *group, IdList *members,
+                          unsigned depth) {
+  if (depth >= fu_group_map().size())
+    ErrorLog(group, "Group is recursively defined: {0}", group->name());
+
+  for (auto *member : *group->members()) {
+    if (auto *func_unit = FindItem(fu_map(), member->name())) {
+      group->add_unit(func_unit);
+      continue;
+    }
+    if (auto *subgroup = FindItem(fu_group_map(), member->name()))
+      if (!ExpandGroup(group, subgroup->members(), depth + 1))
+        return false;
+  }
+  return true;
+}
+
+//---------------------------------------------------------------------------
+// Check for lexical argument compatibility between an instantiation and
+// a template definition (for functional units, subunits, and latencies).
+// Explicitly link argument objects to their associated template parameters.
+// Return true if any errors are found.
+//---------------------------------------------------------------------------
+void MdlSpec::ValidateArgs(const ParamsList *params,
+                           const ResourceRefList *instance, MdlItem *item) {
+  if (params->size() != instance->size()) {
+    ErrorLog(item, "Instance has wrong number of parameters");
+    return;
+  }
+  for (unsigned idx = 0; idx < params->size(); idx++)
+    (*instance)[idx]->set_parameter((*params)[idx]);
+}
+
+//---------------------------------------------------------------------------
+// For each functional unit, subunit, and latency instantiation, find the
+// referenced template definition (if it exists) and make sure the parameters
+// match. Link instances to their templates, and link instant arguments to
+// the associated template parameter.
+// Any errors found here are considered fatal, so just abort.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckInstantiations() {
+  // For every CPU and cluster, find each functional unit instantiation and
+  // check its parameters against its functional unit template definition.
+  for (auto *cpu : cpus()) {
+    for (auto *cluster : *cpu->clusters()) {
+      for (auto *fu_inst : *cluster->func_units()) {
+        if (auto *fu_temp = FindItem(fu_map(), fu_inst->type()->name())) {
+          ValidateArgs(fu_temp->params(), fu_inst->args(), fu_inst);
+          fu_inst->set_template(fu_temp);
+        } else {
+          ErrorLog(fu_inst, "Undefined functional unit reference: {0}",
+                   fu_inst->type()->name());
+        }
+      }
+    }
+  }
+
+  // For every functional unit template definition, find each subunit
+  // instantiation and check its parameters against its subunit template
+  // definition.
+  for (auto *fu_template : func_units()) {
+    for (auto *instance : *fu_template->subunits()) {
+      if (auto *su_temp = FindItem(su_map(), instance->name())) {
+        ValidateArgs(su_temp->params(), instance->args(), instance);
+        instance->set_template(su_temp);
+      } else {
+        ErrorLog(instance, "Undefined subunit reference: {0}",
+                 instance->name());
+      }
+    }
+  }
+
+  // For every subunit template base definition, find each latency instantiation
+  // and check its parameters against its latency template definition.
+  for (auto *su_template : subunits()) {
+    for (auto *lat_inst : *su_template->latencies()) {
+      if (auto *lat_temp = FindItem(lat_map(), lat_inst->name())) {
+        ValidateArgs(lat_temp->params(), lat_inst->args(), lat_inst);
+        lat_inst->set_template(lat_temp);
+      } else {
+        ErrorLog(lat_inst, "Undefined latency reference: {0}",
+                 lat_inst->name());
+      }
+    }
+  }
+
+  if (ErrorsSeen())
+    Abort(); // If any errors found, abort.
+}
+
+// For each CPU, determine if we need to explicitly manage issue slots.
+// - If there's more than one cluster, we conservatively decide to manage them.
+// - If any functional unit instance pins issue slots, we must manage them.
+void MdlSpec::CheckIssueSlots() {
+  for (auto *cpu : cpus()) {
+    if (cpu->clusters()->size() > 1) {
+      cpu->set_needs_slot_resources(true);
+      continue;
+    }
+    for (auto *cluster : *cpu->clusters())
+      for (auto *fu_inst : *cluster->func_units())
+        if (fu_inst->pin_any() || fu_inst->pin_all()) {
+          cpu->set_needs_slot_resources(true);
+          break;
+        }
+  }
+}
+
+//---------------------------------------------------------------------------
+// Check instruction definitions for valid subunits.
+// Any errors found here are considered fatal, so just abort.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckInstructions() {
+  for (auto *instruct : instructions())
+    for (auto *subunit : *instruct->subunits())
+      if (!su_map().count(subunit->name()))
+        ErrorLog(subunit, "Undefined subunit reference: {0}", subunit->name());
+}
+
+//---------------------------------------------------------------------------
+// Flatten a single operand - push an operand for each component onto the
+// operand list.
+//---------------------------------------------------------------------------
+void MdlSpec::FlattenOperand(OperandDecl *opnd, OperandDeclList *flat_ops) {
+  // If this is not a reference to another operand, just add it to the list.
+  if (opnd->is_implied_register() || opnd->reg_class() ||
+      opnd->operand()->operands()->empty()) {
+    flat_ops->push_back(opnd);
+    return;
+  }
+
+  // Recursively handle operands that reference other operands.
+  for (auto *sub_opnd : *opnd->operand()->operands()) {
+    auto *new_sub_opnd = new OperandDecl(sub_opnd, opnd);
+    new_sub_opnd->add_type(sub_opnd->type());
+    new_sub_opnd->add_name(sub_opnd->op_name());
+    FlattenOperand(new_sub_opnd, flat_ops);
+  }
+}
+
+//---------------------------------------------------------------------------
+// Create an operand list that flattens the operand declarations:
+//       operand opnd(GPR reg, imm value);
+//       instruction inst(opnd X)
+//   becomes:
+//       instruction inst(opnd.GPR X.reg, opnd.imm X.value);
+// Note: Since we've already checked the validity of the operands, there
+// will not be any errors encountered here.
+//---------------------------------------------------------------------------
+void MdlSpec::FlattenInstructionOperands() {
+  for (auto *instruct : instructions())
+    if (instruct->operands()) {
+      auto *flat_ops = new OperandDeclList();
+      for (auto *opnd : *instruct->operands())
+        FlattenOperand(new OperandDecl(opnd, opnd), flat_ops);
+
+      instruct->set_flat_operands(flat_ops);
+    }
+}
+
+// Determine if a derived operand definition is based on the specified operand.
+// Return true if it does.
+bool MdlSpec::FindOperandDerivation(const OperandDef *derived,
+                                    const OperandDef *operand) const {
+  if (derived == operand)
+    return true;
+
+  for (auto *base : *derived->base_operands())
+    if (FindOperandDerivation(base, operand))
+      return true;
+  return false;
+}
+
+// Check a qualified operand name list with an instruction's flattened
+// operand list.
+bool MdlSpec::CompareOpndNames(const OperandDecl *opnd, const IdList &names) {
+  int opnd_size = opnd->op_names()->size();
+  int names_size = names.size();
+
+  // If the operand reference isn't fully qualified, we allow you to skip
+  // the last name if the underlying operand type is a register or register
+  // class.  If you don't like this behavior, provide all the names!
+  if (opnd_size != names_size) {
+    if (opnd_size != names_size + 1)
+      return false;
+
+    // Make sure the missing operand type is a register or register class.
+    std::string opnd_type = opnd->types()->back()->name();
+    if (!FindItem(registers(), opnd_type) && !reg_class_map().count(opnd_type))
+      return false;
+  }
+
+  // All the leading names need to match.
+  for (int index = 0; index < names_size; index++)
+    if ((*opnd->op_names())[index]->name() != names[index]->name())
+      return false;
+  return true;
+}
+
+// Look up qualified operand name in the instruction's flattened operand
+// list, and return its index in the list, or -1 if not found.
+// Implied operands may show up several times in the operand list, and
+// we need to differentiate defs from uses.
+int MdlSpec::FindOperandName(const InstructionDef *instruct,
+                             const IdList &names, RefType type) {
+  int index = 0;
+  int itype = static_cast<int>(type);
+  for (auto *op_decl : *instruct->flat_operands()) {
+    // Check for references to implied register operands.
+    if (op_decl->is_implied_register() && (itype & RefTypes::kAnyUseDef)) {
+      if (names[0]->name() == op_decl->type()->name()) {
+        if (op_decl->is_input() && (itype & RefTypes::kAnyUse))
+          return index;
+        if (op_decl->is_output() && (itype & RefTypes::kAnyDef))
+          return index;
+      }
+      continue;
+    }
+    // Handle "normal" operands.
+    if (CompareOpndNames(op_decl, names))
+      return index;
+    index++;
+  }
+  return -1;
+}
+
+// Look up an operand by name and optional type, and return the index or
+// -1 if not found.  If a non-empty type is provided, it must match the
+// operand definition type.
+int MdlSpec::FindOperand(const InstructionDef *instr, const IdList &name,
+                         const std::string &type, RefType ref_type) {
+  // First check to see if it's a variant operand ($$<number>). Note that these
+  // never have a declared operand type.
+  if (name[0]->is_vararg())
+    return name[0]->vararg_index() + instr->num_flat_operands() - 1;
+
+  // If the operand is simply an operand index ($<number>) use that as the
+  // operand id, otherwise look up the operand name(s).
+  int opnd_id;
+  if (name[0]->is_number()) {
+    opnd_id = name[0]->get_number();
+    if (opnd_id >= instr->num_flat_operands())
+      return -1;
+  } else {
+    opnd_id = FindOperandName(instr, name, ref_type);
+  }
+  if (opnd_id == -1)
+    return -1;
+
+  // See if the operand types match.  If either is empty, we match.
+  auto opnd_decl = instr->GetOperandDecl(opnd_id);
+  if (opnd_decl == nullptr)
+    return opnd_id;
+
+  std::string decl_type = opnd_decl->type_name();
+  if (type.empty() || decl_type.empty() || decl_type == type)
+    return opnd_id;
+
+  // If the operand match type is a derived operand, check if its derived
+  // from the declared operand type.
+  if (!type.empty() && !decl_type.empty()) {
+    auto *ins_opnd_type = FindItem(operands(), decl_type);
+    auto *ref_opnd_type = FindItem(operands(), type);
+    if (ins_opnd_type && ref_opnd_type && ref_opnd_type->IsDerivedOperand())
+      if (FindOperandDerivation(ref_opnd_type, ins_opnd_type))
+        return opnd_id;
+  }
+
+  // If the operand names don't match, and they are both register classes,
+  // and if the reference class is a strict superset of the declared class,
+  // it's a match.
+  auto *decl_class = FindItem(reg_classes(), decl_type);
+  auto *ref_class = FindItem(reg_classes(), type);
+  if (decl_class && ref_class && ref_class->IsSupersetOf(decl_class))
+    return opnd_id;
+  return -1; // Not found.
+}
+
+// Given an operand reference, determine its index in this instruction.
+// Return the index, or -1 if not found.
+int MdlSpec::GetOperandIndex(const InstructionDef *instr,
+                             const OperandRef *operand, RefType ref_type) {
+  if (operand == nullptr)
+    return -1;
+  std::string type = operand->op_type() ? operand->op_type()->name() : "";
+  return FindOperand(instr, *operand->op_names(), type, ref_type);
+}
+
+//---------------------------------------------------------------------------
+// Make sure we don't have recursively defined operands.
+// This can happen with suboperands or operand bases.
+// Return true if recursion found.
+//---------------------------------------------------------------------------
+bool MdlSpec::CheckRecursiveOperands(OperandDef *opnd, OperandDefList &seen) {
+  seen.push_back(opnd);
+
+  // Check suboperands.
+  if (auto *sub_operands = opnd->operands()) {
+    for (auto *opnd_decl : *sub_operands) {
+      if (opnd_decl->operand()) {
+        if (FindItem(seen, opnd_decl->operand()->name())) {
+          ErrorLog(seen[0], "Recursively defined operand: {0}",
+                   seen[0]->name());
+          return true;
+        }
+        if (CheckRecursiveOperands(opnd_decl->operand(), seen))
+          return true;
+      }
+    }
+  }
+
+  // Check base operands.
+  for (auto *base : *opnd->base_operands()) {
+    if (FindItem(seen, base->name())) {
+      ErrorLog(seen[0], "Recursively defined operand: {0}", seen[0]->name());
+      return true;
+    }
+    if (CheckRecursiveOperands(base, seen))
+      return true;
+  }
+
+  seen.pop_back();
+  return false;
+}
+
+// Define a set to keep track of operand definitions we've already seen.
+using OperandDefSet = std::unordered_set<OperandDef *>;
+
+//---------------------------------------------------------------------------
+// Check that derived operands have only one derivation to any base operand.
+// If there is more than one derivation (ie a diamond pattern) the derivation
+// is ambiguous, and we can't always generate meaningful code for it.
+// Return nullptr if the derivation is ambiguous.
+//---------------------------------------------------------------------------
+static OperandDef *CheckOperandDerivation(OperandDef *opnd,
+                                          OperandDefSet &seen) {
+  // If we've already seen this operand, its either recursive (already
+  // checked) or ambiguously defined.  We've already checked for recursion,
+  // so have to abort if we see it, but we don't want to report it as
+  // an ambiguous derivation.
+  if (seen.count(opnd))
+    return (opnd->IsDerivedOperand()) ? nullptr : opnd;
+
+  seen.insert(opnd);
+  for (auto *base : *opnd->base_operands())
+    if (auto *item = CheckOperandDerivation(base, seen))
+      return item;
+
+  if (opnd->IsDerivedOperand())
+    seen.erase(opnd);
+  return nullptr;
+}
+
+//---------------------------------------------------------------------------
+// Check that derived operands have only one derivation to any base operand.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckOperandDerivations(OperandDef *opnd) {
+  OperandDefSet seen;
+  if (auto *base = CheckOperandDerivation(opnd, seen))
+    ErrorLog(opnd, "Ambiguous operand derivation: {0}->{1}", opnd->name(),
+             base->name());
+}
+
+//---------------------------------------------------------------------------
+// Check that a single operand reference is either a reference to a defined
+// operand, a register class, or a register name (an implied operand).
+// Link valid declarations with their definitions.
+// NOTE: We currently don't allow instruction definitions or operands to
+// directly reference derived operands, so we explicitly check for this here.
+// Derived operands exist to qualify regular operand types in reference rules.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckOperand(OperandDecl *operand_decl) {
+  const std::string &name = operand_decl->type()->name();
+  if (operand_map().count(name)) {
+    operand_decl->set_operand(operand_map()[name]);
+    if (operand_decl->operand()->bases())
+      ErrorLog(operand_decl, "Invalid use of a derived operand: {0}", name);
+  } else if (reg_class_map().count(name)) {
+    operand_decl->set_regclass(reg_class_map()[name]);
+  } else if (FindItem(registers(), name) != nullptr) {
+    operand_decl->set_is_implied_register();
+  } else {
+    ErrorLog(operand_decl, "Undefined operand type: {0}", name);
+  }
+
+  if (!operand_decl->is_implied_register() && !operand_decl->is_ellipsis())
+    if (operand_decl->name().empty())
+      ErrorLog(operand_decl, "Instruction operands must have names");
+}
+
+//---------------------------------------------------------------------------
+// Check that operand references (in instruction definitions, operand
+// definitions, and latency references) refer to valid operands.
+// Link operand declarations to their definitions. Also link derived
+// operands to their base operands.
+// Check for recursively defined operands, or ambiguously derived operands.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckOperands() {
+  // Check instruction definitions for valid operand types.  They can
+  // be either operand definitions or register class definitions.
+  for (auto *instruct : instructions())
+    for (auto *operand : *instruct->operands())
+      CheckOperand(operand);
+
+  // Check operand definitions for valid operand types, and link declarations
+  // to their definitions.  If an operand is derived, link it to its base.
+  for (auto *operand_def : operands()) {
+    for (auto *operand : *operand_def->operands())
+      CheckOperand(operand);
+    if (auto *base_list = operand_def->bases()) {
+      for (auto *base : *base_list)
+        if (operand_map().count(base->name()))
+          operand_def->add_base_operand(operand_map()[base->name()]);
+        else
+          ErrorLog(base, "Undefined base operand: {0}", base->name());
+    }
+  }
+  if (ErrorsSeen())
+    Abort(); // If any errors found, abort.
+
+  // Check for recursively defined operands.
+  for (auto *opnd_def : operands()) {
+    OperandDefList seen;
+    CheckRecursiveOperands(opnd_def, seen);
+  }
+
+  // Check for valid derivations for derived operands.
+  for (auto *opnd_def : operands())
+    if (!opnd_def->base_operands()->empty()) {
+      CheckOperandDerivations(opnd_def);
+    }
+  if (ErrorsSeen())
+    Abort(); // If any errors found, abort.
+
+  // Once we've checked all the operands, flatten the operand hierarchy to a
+  // single level.
+  FlattenInstructionOperands();
+}
+
+//---------------------------------------------------------------------------
+// Scan references in each latency rule and report references which were
+// encountered, but never valid (in any instruction, in any subunit).
+//---------------------------------------------------------------------------
+void MdlSpec::CheckReferenceUse() {
+  for (auto *latency : latencies())
+    for (auto *ref : *latency->references())
+      if (ref->seen() && !ref->used())
+        WarningLog(ref, "Reference never used: {0}", ref->ToString());
+}
+
+//---------------------------------------------------------------------------
+// Print a warning for any subunit template that isn't used.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckSubunitUse() {
+  for (auto *subunit : subunits())
+    if (subunit->use_count() == 0)
+      WarningLog(subunit, "Subunit never used: {0}", subunit->name());
+}
+
+//---------------------------------------------------------------------------
+// Check a single resource definition for a pooled resource.
+// Pooled resources with shared bits must specify a phase - there's no
+// reasonable way to manage these across arbitrary pipeline phases.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckResourceDef(const ResourceDef *def) {
+  if (def->IsPoolDef() && def->start_phase() == nullptr &&
+      def->has_shared_bits())
+    ErrorLog(def, "Shared resource pools must have a pipeline phase: {0}",
+             def->ToString());
+}
+
+//---------------------------------------------------------------------------
+// Make sure shared resource pools have been declared with a pipe phase.
+// Do this before functional unit instantiation and global/group resource
+// promotion, so that we don't get duplicate error messages.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckResourceDefs() {
+  // Check resources defined globally.
+  for (auto def : resources())
+    CheckResourceDef(def);
+
+  // Check resources defined in functional units.
+  for (auto *funit : func_units())
+    for (auto def : *funit->resources())
+      CheckResourceDef(def);
+
+  // Check resources defined in cpus (and clusters).
+  FindDuplicates(cpus());
+  for (auto *cpu : cpus()) {
+    for (auto def : *cpu->resources())
+      CheckResourceDef(def);
+    for (auto *cluster : *cpu->clusters())
+      for (auto def : *cluster->resources())
+        CheckResourceDef(def);
+  }
+}
+
+//---------------------------------------------------------------------------
+// Print a warning for any inconsistent resource use.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckResourceUse() {
+  for (auto *cpu : cpus())
+    for (auto *res : cpu->all_resources())
+      if (res != cpu->all_resources().back()) {
+        if (!res->is_used())
+          WarningLog(res, "Resource never referenced: {0}", res->debug_name());
+        else if (res->only_held())
+          WarningLog(res, "Resource Held but never Reserved: {0}", res->name());
+        else if (res->only_reserved())
+          WarningLog(res, "Resource Reserved but never Held: {0}", res->name());
+      }
+}
+
+//---------------------------------------------------------------------------
+// Check that conditional references have a valid predicate, and also check
+// the predicated references for validity.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckConditionalReferences(ConditionalRef *cond_ref) {
+  if (cond_ref == nullptr)
+    return;
+  if (cond_ref->predicate() != nullptr)
+    if (predicate_table_.count(cond_ref->predicate()->name()) == 0)
+      ErrorLog(cond_ref->predicate(), "Undefined predicate name: {0}",
+               cond_ref->predicate()->name());
+
+  for (auto *ref : cond_ref->refs()) {
+    if (ref->operand() && ref->operand()->op_type() != nullptr)
+      CheckSubOperands(ref->operand(), ref->operand()->op_type(), 1);
+    if (ref->IsConditionalRef())
+      CheckConditionalReferences(ref->conditional_ref());
+  }
+  CheckConditionalReferences(cond_ref->else_clause());
+}
+
+//---------------------------------------------------------------------------
+// Check operand references in each rule. Note that, at this point in the
+// compilation, we can only check references which explicitly specify an
+// operand type. We look for references that will -always- fail for any
+// instruction.
+//---------------------------------------------------------------------------
+void MdlSpec::CheckReferences() {
+  for (auto *latency : latencies())
+    for (auto *ref : *latency->references()) {
+      if (ref->operand() && ref->operand()->op_type() != nullptr)
+        CheckSubOperands(ref->operand(), ref->operand()->op_type(), 1);
+      if (ref->IsConditionalRef())
+        CheckConditionalReferences(ref->conditional_ref());
+    }
+}
+
+//---------------------------------------------------------------------------
+// Promote globally defined resources to be CPU-defined resources.  This
+// gives each CPU a unique set of resources, so we can name them, renumber
+// them, and track use of them separately for each CPU.
+//---------------------------------------------------------------------------
+void MdlSpec::PromoteGlobalResources() {
+  for (auto *cpu : cpus())
+    for (auto *resource : resources())
+      cpu->resources()->push_back(new ResourceDef(*resource));
+}
+
+// If we promoted a member and the promoted resource already exists, check that
+// they have compatible definitions.
+void MdlSpec::CheckPromotedMember(ResourceDef *group, Identifier *member,
+                                  ResourceDef *promoted) {
+  // The promoted resource cannot be part of a group.
+  if (promoted->IsPoolDef())
+    ErrorLog(member, "Invalid group member: {0}", member->name());
+  // The group and the promoted resource must have the same attributes.
+  if (group->bit_size() != promoted->bit_size() ||
+      group->start_phase() != promoted->start_phase() ||
+      group->end_phase() != promoted->end_phase())
+    ErrorLog(member, "Inconsistent group definition: {0}", member->name());
+}
+
+//---------------------------------------------------------------------------
+// Given a list of resource definitions for a scope, find group definitions
+// and promote each member to a regular resource definition, and annotate
+// the resource group with the new resource definition. If the resource is
+// already defined (either by the user or a previous promotion), make sure
+// the definitions match.
+//---------------------------------------------------------------------------
+void MdlSpec::PromoteResourceGroupMembers(ResourceDefList *resources,
+                                          ResourceDefList *outer_scope,
+                                          ResourceRefDict *args) {
+  ResourceDefList promos;
+  for (auto *resource : *resources) {
+    if (resource->IsGroupDef()) {
+      for (unsigned idx = 0; idx < resource->members().size(); idx++) {
+        auto *mem = resource->members()[idx];
+        // See if this member is defined or has been previously promoted.
+        ResourceDef *def = FindItem(*resources, mem->name());
+        if (def == nullptr)
+          def = FindItem(promos, mem->name());
+        if (def == nullptr && outer_scope != nullptr)
+          def = FindItem(*outer_scope, mem->name());
+        if (def == nullptr && args != nullptr) {
+          if (auto *ref = FindItem(*args, mem->name())) {
+            def = ref->definition();
+            resource->members()[idx] = new Identifier(def->id(), mem->index());
+          }
+        }
+
+        // If we didn't find the resource, create a new resource and add it to
+        // the list of things to promote.
+        if (def == nullptr) {
+          def = new ResourceDef(*mem, mem, resource->bit_size(), 0,
+                                resource->start_phase(), resource->end_phase());
+          promos.push_back(def);
+        }
+        // Add the promoted resource to the def list for the group.
+        CheckPromotedMember(resource, mem, def);
+        resource->add_member_def(def);
+      }
+      // After promoting all the members of a group, check that we didn't end
+      // up with duplicate members in the group.
+      FindDuplicates(resource->members());
+    }
+  }
+
+  // Add all the new resources to the defined resources.
+  resources->insert(resources->end(), promos.begin(), promos.end());
+}
+
+//---------------------------------------------------------------------------
+// Scan arguments to functional unit instances, and promote implicit group
+// definitions to cluster resources.
+//---------------------------------------------------------------------------
+void PromoteFuncUnitGroupArgs(ClusterInstance *cluster) {
+  for (auto *instance : *cluster->func_units()) {
+    for (auto *arg : *instance->args()) {
+      if (arg->IsGroupRef() && arg->implicit_group())
+        cluster->resources()->push_back(arg->definition());
+    }
+  }
+}
+
+//---------------------------------------------------------------------------
+// Promote group member definitions to regular resources for CPUs, Clusters.
+// We promote functional unit templates' resources separately for each
+// instance.
+//---------------------------------------------------------------------------
+void MdlSpec::PromoteResourceGroups() {
+  for (auto *cpu : cpus()) {
+    PromoteResourceGroupMembers(cpu->resources(), nullptr, nullptr);
+    for (auto *clus : *cpu->clusters()) {
+      PromoteFuncUnitGroupArgs(clus);
+      PromoteResourceGroupMembers(clus->resources(), cpu->resources(), nullptr);
+    }
+  }
+}
+
+//---------------------------------------------------------------------------
+// Return true if this is a valid operand reference.
+//---------------------------------------------------------------------------
+bool MdlSpec::CheckSubOperands(OperandRef *ref, const Identifier *opnd,
+                               int idx) {
+  int size = ref->op_names()->size();
+  bool is_reg_class = reg_class_map().count(opnd->name());
+  bool is_operand = operand_map().count(opnd->name());
+
+  if (is_reg_class)
+    ref->set_regclass(reg_class_map()[opnd->name()]);
+  if (is_operand)
+    ref->set_operand(operand_map()[opnd->name()]);
+
+  if (is_reg_class && idx == size)
+    return true;
+
+  if (is_operand && idx < size) {
+    OperandDef *op_type = operand_map()[opnd->name()];
+    if (!op_type->operands()->empty()) {
+      auto *item =
+          FindItem(*op_type->operands(), (*ref->op_names())[idx]->name());
+      if (item != nullptr)
+        return CheckSubOperands(ref, item->type(), idx + 1);
+      opnd = nullptr; // Force an error message.
+    }
+  }
+
+  if (opnd == nullptr || (!is_reg_class && !is_operand)) {
+    ErrorLog(ref, "Undefined operand type: {0}", ref->ToString());
+    return false;
+  }
+
+  if (idx < size) {
+    ErrorLog(ref, "Over-qualified operand reference: {0}", ref->ToString());
+    return false;
+  }
+
+  if (is_operand && !operand_map()[opnd->name()]->operands()->empty()) {
+    ErrorLog(ref, "Under-qualified operand reference: {0}", ref->ToString());
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/MdlCompiler/mdl_visitor.h b/llvm/utils/MdlCompiler/mdl_visitor.h
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_visitor.h
@@ -0,0 +1,253 @@
+//===- mdl_visitor.h - Definitions for the ANTLR parse tree ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definitions for processing the MDL language Antlr parse tree.
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef MDL_COMPILER_MDL_VISITOR_H_
+#define MDL_COMPILER_MDL_VISITOR_H_
+
+#include <exception>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "antlr4-runtime.h"
+#include "mdl.h"
+#include "mdlLexer.h"
+#include "mdlParser.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+
+namespace mpact {
+namespace mdl {
+
+using mpact::mdl::generated::mdlLexer;
+using mpact::mdl::generated::mdlParser;
+
+//----------------------------------------------------------------------------
+// Define a error listener class to capture syntax errors.
+//----------------------------------------------------------------------------
+class MdlErrorListener : public antlr4::BaseErrorListener {
+public:
+  explicit MdlErrorListener(const std::string &file_name)
+      : syntax_error_count_(0), file_name_(file_name) {}
+
+  void syntaxError(antlr4::Recognizer *recognizer,
+                   antlr4::Token *offendingSymbol, size_t line,
+                   size_t charPositionInLine, const std::string &msg,
+                   std::exception_ptr e) override {
+    llvm::errs() << std::string(llvm::formatv("{0}:{1}:{2}:{3}\n", file_name_,
+                                              line, charPositionInLine, msg));
+    syntax_error_count_++;
+  }
+
+  int syntax_error_count() const { return syntax_error_count_; }
+
+  const std::string &file_name() const { return file_name_; }
+  void set_file_name(std::string file_name) {
+    file_name_ = std::move(file_name);
+  }
+
+private:
+  int syntax_error_count_ = 0;
+  std::string file_name_; // name of current input file
+};
+
+//----------------------------------------------------------------------------
+// Define a visitor class using the antlr base visitor class.
+// This is the top-level interface to the Antlr lexer/parser.
+//----------------------------------------------------------------------------
+class MdlVisitor {
+public:
+  explicit MdlVisitor(MdlSpec &spec, const std::string &import_path)
+      : spec_(spec), import_path_(import_path) {}
+
+  // Perform the lexing and parsing of the input stream. A representation of
+  // the input is returned in spec.  Return false if syntax errors found.
+  bool ProcessInputFile(std::string file_name);
+
+  // Type aliases for antlr4 context types.
+  using ArchitectureSpecCtx = mdlParser::Architecture_specContext;
+  using FamilyCtx = mdlParser::Family_nameContext;
+  using CpuDefCtx = mdlParser::Cpu_defContext;
+  using ClusterInstantiationCtx = mdlParser::Cluster_instantiationContext;
+  using FuncUnitInstantiationCtx = mdlParser::Func_unit_instantiationContext;
+  using FuncUnitInstanceCtx = mdlParser::Func_unit_instanceContext;
+  using FuncUnitBasesCtx = mdlParser::Func_unit_basesContext;
+  using PinAllCtx = mdlParser::Pin_allContext;
+  using PinAnyCtx = mdlParser::Pin_anyContext;
+  using ForwardStmtCtx = mdlParser::Forward_stmtContext;
+  using ForwardToUnitCtx = mdlParser::Forward_to_unitContext;
+  using FuncUnitTemplateCtx = mdlParser::Func_unit_templateContext;
+  using FuncUnitParamsCtx = mdlParser::Func_unit_paramsContext;
+  using FuncUnitGroupCtx = mdlParser::Func_unit_groupContext;
+  using FuDeclItemCtx = mdlParser::Fu_decl_itemContext;
+  using PortDefCtx = mdlParser::Port_defContext;
+  using PortDeclCtx = mdlParser::Port_declContext;
+  using ConnectStmtCtx = mdlParser::Connect_stmtContext;
+  using SubunitInstantiationCtx = mdlParser::Subunit_instantiationContext;
+  using SubunitStatementCtx = mdlParser::Subunit_statementContext;
+  using SubunitInstanceCtx = mdlParser::Subunit_instanceContext;
+
+  using SubunitTemplateCtx = mdlParser::Subunit_templateContext;
+  using SuDeclItemsCtx = mdlParser::Su_decl_itemsContext;
+  using SuDeclItemCtx = mdlParser::Su_decl_itemContext;
+  using SubunitBodyCtx = mdlParser::Subunit_bodyContext;
+  using LatencyInstanceCtx = mdlParser::Latency_instanceContext;
+  using LatencyStatementCtx = mdlParser::Latency_statementContext;
+
+  using LatencyTemplateCtx = mdlParser::Latency_templateContext;
+  using LatencyItemsCtx = mdlParser::Latency_itemsContext;
+  using LatencyItemCtx = mdlParser::Latency_itemContext;
+  using ConditionalRefCtx = mdlParser::Conditional_refContext;
+  using ConditionalElseIfCtx = mdlParser::Conditional_elseifContext;
+  using ConditionalElseCtx = mdlParser::Conditional_elseContext;
+  using LatencyRefCtx = mdlParser::Latency_refContext;
+  using LatencySpecCtx = mdlParser::Latency_specContext;
+  using LatencyResourceRefsCtx = mdlParser::Latency_resource_refsContext;
+  using LatencyResourceRefCtx = mdlParser::Latency_resource_refContext;
+  using FusStatementCtx = mdlParser::Fus_statementContext;
+  using FusItemCtx = mdlParser::Fus_itemContext;
+  using FusAttributeCtx = mdlParser::Fus_attributeContext;
+  using ExprCtx = mdlParser::ExprContext;
+  using OperandCtx = mdlParser::OperandContext;
+  using OperandRefCtx = mdlParser::Operand_refContext;
+  using PipeDefCtx = mdlParser::Pipe_defContext;
+  using PipePhasesCtx = mdlParser::Pipe_phasesContext;
+  using PhaseIdCtx = mdlParser::Phase_idContext;
+
+  using ResourceDefCtx = mdlParser::Resource_defContext;
+  using ResourceDeclCtx = mdlParser::Resource_declContext;
+  using ResourceRefsCtx = mdlParser::Resource_refsContext;
+  using ResourceRefCtx = mdlParser::Resource_refContext;
+  using IssueStatementCtx = mdlParser::Issue_statementContext;
+  using NameListCtx = mdlParser::Name_listContext;
+  using GroupListCtx = mdlParser::Group_listContext;
+  using SuBaseListCtx = mdlParser::Su_base_listContext;
+  using BaseListCtx = mdlParser::Base_listContext;
+  using RegisterDefCtx = mdlParser::Register_defContext;
+  using RegisterDeclCtx = mdlParser::Register_declContext;
+  using RegisterClassCtx = mdlParser::Register_classContext;
+  using IdentCtx = mdlParser::IdentContext;
+  using InstructionDefCtx = mdlParser::Instruction_defContext;
+  using OperandDeclCtx = mdlParser::Operand_declContext;
+  using OperandDefCtx = mdlParser::Operand_defContext;
+  using OperandTypeCtx = mdlParser::Operand_typeContext;
+  using DerivedOperandDefCtx = mdlParser::Derived_operand_defContext;
+  using PredicateDefCtx = mdlParser::Predicate_defContext;
+  using PredicateOpCtx = mdlParser::Predicate_opContext;
+  using PredicateOpndCtx = mdlParser::Pred_opndContext;
+  using PredicateOpcodeCtx = mdlParser::Pred_opcodeContext;
+
+  using OperandAttributeCtx = mdlParser::Operand_attributeContext;
+  using OperandAttributeStmtCtx = mdlParser::Operand_attribute_stmtContext;
+  using TupleCtx = mdlParser::TupleContext;
+  using PredValueCtx = mdlParser::Pred_valueContext;
+  using ImportFileCtx = mdlParser::Import_fileContext;
+
+  // Visitor methods for all rules in the grammar.
+  bool VisitArchitectureSpec(ArchitectureSpecCtx *ctx);
+  Identifier *VisitFamilySpec(FamilyCtx *ctx);
+  bool VisitImportFile(ImportFileCtx *ctx);
+  CpuInstance *VisitCpuDef(CpuDefCtx *ctx);
+  ClusterInstance *VisitClusterInstantiation(ClusterInstantiationCtx *ctx);
+  FuncUnitInstance *VisitFuncUnitInstantiation(FuncUnitInstantiationCtx *ctx,
+                                               ResourceDefList *resources);
+  ForwardStmt *VisitForwardStmt(ForwardStmtCtx *ctx);
+  IdList *VisitPinAll(PinAllCtx *ctx);
+  IdList *VisitPinAny(PinAnyCtx *ctx);
+  FuncUnitTemplate *VisitFuncUnitTemplate(FuncUnitTemplateCtx *ctx);
+  FuncUnitGroup *VisitFuncUnitGroup(FuncUnitGroupCtx *ctx);
+  ParamsList *VisitFuncUnitParams(FuncUnitParamsCtx *ctx);
+  ParamsList *VisitFuDeclItem(FuDeclItemCtx *ctx);
+  IdList *VisitPortDef(PortDefCtx *ctx, ConnectList *connects);
+  Identifier *VisitPortDecl(PortDeclCtx *ctx, ConnectList *connects);
+  Connect *VisitConnectStmt(ConnectStmtCtx *ctx);
+  SubUnitInstList *VisitSubunitInstantiation(SubunitInstantiationCtx *ctx,
+                                             ResourceDefList *resources);
+  SubUnitInstList *VisitSubunitStatement(SubunitStatementCtx *ctx,
+                                         IdList *predicate,
+                                         ResourceDefList *resources);
+  SubUnitTemplate *VisitSubunitTemplate(SubunitTemplateCtx *ctx);
+  ParamsList *VisitSuDeclItems(SuDeclItemsCtx *ctx);
+  ParamsList *VisitSuDeclItem(SuDeclItemCtx *ctx);
+  LatencyInstList *VisitLatencyInstance(LatencyInstanceCtx *ctx);
+  LatencyInstance *VisitLatencyStatement(LatencyStatementCtx *ctx,
+                                         IdList *predicate);
+  LatencyTemplate *VisitLatencyTemplate(LatencyTemplateCtx *ctx);
+  ReferenceList *VisitLatencyItems(std::vector<LatencyItemsCtx *> &ctx);
+  ReferenceList *VisitLatencyItem(std::vector<LatencyItemCtx *> &ctx,
+                                  IdList *predicates);
+  Reference *VisitConditionalRef(ConditionalRefCtx *ctx, IdList *predicates);
+  ConditionalRef *VisitConditionalElseIf(ConditionalElseIfCtx *ctx);
+  ConditionalRef *VisitConditionalElse(ConditionalElseCtx *ctx);
+
+  Reference *VisitLatencyRef(LatencyRefCtx *ctx, IdList *predicates);
+  ResourceRefList *VisitLatencyResourceRefs(LatencyResourceRefsCtx *ctx);
+  ResourceRef *VisitLatencyResourceRef(LatencyResourceRefCtx *ctx);
+  PhaseExpr *VisitExpr(ExprCtx *ctx);
+  void VisitFusStatement(ReferenceList *refs, FusStatementCtx *ctx);
+  OperandRef *VisitOperand(OperandCtx *ctx);
+  PipePhases *VisitPipeDef(PipeDefCtx *ctx);
+  PhaseNameList *VisitPipePhases(PipePhasesCtx *ctx, bool is_protected,
+                                 bool is_hard, PhaseName *&exe_phase);
+  PhaseNameList *VisitPhaseId(PhaseIdCtx *ctx, bool is_protected, bool is_hard,
+                              bool &is_first);
+  ResourceDefList *VisitResourceDef(ResourceDefCtx *ctx);
+  ResourceDef *VisitResourceDecl(ResourceDeclCtx *ctx, Identifier *start,
+                                 Identifier *end);
+  ResourceRefList *VisitResourceRefs(ResourceRefsCtx *ctx,
+                                     ResourceDefList *resources = nullptr);
+  ResourceRef *VisitResourceRef(ResourceRefCtx *ctx,
+                                ResourceDefList *resources = nullptr);
+  ResourceDefList *VisitIssueStatement(IssueStatementCtx *ctx);
+  IdList *VisitNameList(NameListCtx *ctx);
+  IdList *VisitGroupList(GroupListCtx *ctx);
+  void VisitSuBaseList(SuBaseListCtx *ctx, IdList *&bases, StringList *&regex);
+  IdList *VisitBaseList(BaseListCtx *ctx);
+  RegisterDefList *VisitRegisterDef(RegisterDefCtx *ctx);
+  RegisterDefList *VisitRegisterDecl(RegisterDeclCtx *ctx);
+  RegisterClass *VisitRegisterClass(RegisterClassCtx *ctx);
+
+  InstructionDef *VisitInstructionDef(InstructionDefCtx *ctx);
+
+  OperandDecl *VisitOperandDecl(OperandDeclCtx *ctx, int opnd_id);
+  OperandDef *VisitOperandDef(OperandDefCtx *ctx);
+  OperandDef *VisitDerivedOperandDef(DerivedOperandDefCtx *ctx);
+  OperandAttributeList *VisitOperandAttribute(OperandAttributeCtx *ctx);
+  OperandAttribute *VisitOperandAttributeStmt(OperandAttributeStmtCtx *ctx,
+                                              IdList *predicate);
+  PredValue *VisitPredValue(PredValueCtx *ctx);
+  void VisitPredicateDef(PredicateDefCtx *ctx, MdlSpec &spec);
+  PredExpr *VisitPredicateOp(PredicateOpCtx *ctx);
+  PredExpr *VisitPredicateOpnd(PredicateOpndCtx *ctx);
+
+  std::vector<int> *VisitTuple(TupleCtx *ctx);
+  Identifier *VisitIdent(IdentCtx *ctx);
+
+  std::string &current_file_name() { return current_file_name_; }
+  void set_current_file_name(const std::string &file_name) {
+    current_file_name_ = file_name;
+  }
+
+  MdlSpec &spec() { return spec_; }
+
+private:
+  MdlSpec &spec_;
+  std::unordered_set<std::string> imported_files_;
+  std::string current_file_name_;
+  std::string import_path_;
+};
+
+} // namespace mdl
+} // namespace mpact
+
+#endif // MDL_COMPILER_MDL_VISITOR_H_
diff --git a/llvm/utils/MdlCompiler/mdl_visitor.cpp b/llvm/utils/MdlCompiler/mdl_visitor.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/MdlCompiler/mdl_visitor.cpp
@@ -0,0 +1,1782 @@
+//===- mdl_visitor.cpp - Parse the file and process the parse tree --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Parser driver and visitors for the machine description language.
+//
+// This file contains the top level parser driver and visitor functions used
+// to extract semantic information from the generated parse tree.
+//
+// IF YOU CHANGE MDL.G4, YOU SHOULD ASSUME YOU HAVE TO CHANGE THIS FILE.
+//  - some mdl changes will result in compilation failures.
+//  - some changes could result in runtime crashes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mdl_visitor.h"
+
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#include "llvm/Support/Error.h"
+
+namespace mpact {
+namespace mdl {
+
+//-----------------------------------------------------------------------------
+// Create Antlr lexer and parser, then parse the input stream.
+// After parsing, visit all the notes in the tree, collect all the
+// CPU definition information and add it to the MdlSpec object.
+// Return true if successful, false if errors found.
+//-----------------------------------------------------------------------------
+bool MdlVisitor::ProcessInputFile(std::string file_name) {
+  // If we've already seen this file name, not an error, but there's no point
+  // in parsing it again.
+  if (!imported_files_.emplace(file_name).second)
+    return true;
+
+  // Attempt to open the file.
+  std::fstream *mdl_source = new std::fstream(file_name, std::fstream::in);
+  if (!mdl_source->is_open()) {
+    llvm::errs() << formatv("File not found: \"{0}\"\n", file_name);
+    return false;
+  }
+
+  // Create an input stream for Antlr.
+  antlr4::ANTLRInputStream mdl_input;
+  mdl_input.load(*mdl_source);
+
+  // Create an Antlr lexer.
+  mdlLexer lexer(&mdl_input);
+
+  // Create a token stream for the parser.
+  antlr4::CommonTokenStream tokens(&lexer);
+
+  // Create an Antlr parser.
+  mdlParser parser(&tokens);
+
+  // Create an error listener and attach to parser (to look for syntax errors).
+  MdlErrorListener error_listener(file_name);
+  parser.removeErrorListeners();
+  parser.addErrorListener(&error_listener);
+
+  // Run the parser over the token stream.
+  ArchitectureSpecCtx *arch = parser.architecture_spec();
+
+  if (error_listener.syntax_error_count())
+    return false;
+
+  // Visit all the nodes in the tree.
+  std::string old_file_name = current_file_name();
+  set_current_file_name(file_name);
+
+  bool success = VisitArchitectureSpec(arch);
+  set_current_file_name(old_file_name);
+  return success;
+}
+
+//-----------------------------------------------------------------------------
+// Process architecture_spec and architecture_item rules:
+//     architecture_spec  : architectural_item+
+//     architectural_item : cpu_def | register_def | resource_def |
+//                          pipe_def | func_unit_template | subunit_template |
+//                          latency_template | import_file;
+//
+// This is the top-level production.  Here we create an object (MdlSpec)
+// which contains all of the information in the input description.
+//-----------------------------------------------------------------------------
+bool MdlVisitor::VisitArchitectureSpec(ArchitectureSpecCtx *ctx) {
+  bool success = true;
+  for (auto *item : ctx->architecture_item()) {
+    if (auto *pipe = item->pipe_def()) {
+      spec().pipe_phases().push_back(VisitPipeDef(pipe));
+    } else if (auto *reg = item->register_def()) {
+      RegisterDefList *regs = VisitRegisterDef(reg);
+      spec().registers().insert(spec().registers().end(), regs->begin(),
+                                regs->end());
+    } else if (auto *regclass = item->register_class()) {
+      spec().reg_classes().push_back(VisitRegisterClass(regclass));
+    } else if (auto *resdef = item->resource_def()) {
+      ResourceDefList *res = VisitResourceDef(resdef);
+      spec().resources().insert(spec().resources().end(), res->begin(),
+                                res->end());
+    } else if (auto *cpu = item->cpu_def()) {
+      spec().cpus().push_back(VisitCpuDef(cpu));
+    } else if (auto *func = item->func_unit_template()) {
+      spec().func_units().push_back(VisitFuncUnitTemplate(func));
+    } else if (auto *func = item->func_unit_group()) {
+      spec().func_unit_groups().push_back(VisitFuncUnitGroup(func));
+    } else if (auto *subunit = item->subunit_template()) {
+      spec().subunits().push_back(VisitSubunitTemplate(subunit));
+    } else if (auto *latency = item->latency_template()) {
+      spec().latencies().push_back(VisitLatencyTemplate(latency));
+    } else if (auto *instruction = item->instruction_def()) {
+      spec().instructions().push_back(VisitInstructionDef(instruction));
+    } else if (auto *operand = item->operand_def()) {
+      spec().operands().push_back(VisitOperandDef(operand));
+    } else if (auto *operand = item->derived_operand_def()) {
+      spec().operands().push_back(VisitDerivedOperandDef(operand));
+    } else if (auto *import_file = item->import_file()) {
+      success &= VisitImportFile(import_file);
+    } else if (auto *family = item->family_name()) {
+      spec().set_family_name(VisitFamilySpec(family));
+    } else if (auto *pred_def = item->predicate_def()) {
+      VisitPredicateDef(pred_def, spec());
+    }
+  }
+
+  // Find all the implicit latency templates that were created for subunits,
+  // and add them to the global set of latency templates.
+  for (auto *subunit : spec().subunits())
+    if (subunit->inline_latency())
+      spec().latencies().push_back(subunit->inline_latency());
+
+  return success;
+}
+
+//-----------------------------------------------------------------------------
+// Process a family name specification.
+//-----------------------------------------------------------------------------
+Identifier *MdlVisitor::VisitFamilySpec(FamilyCtx *ctx) {
+  return VisitIdent(ctx->ident());
+}
+
+//-----------------------------------------------------------------------------
+//  Process an import file.
+//  Return true if successful, false if any errors are found.
+//-----------------------------------------------------------------------------
+bool MdlVisitor::VisitImportFile(ImportFileCtx *ctx) {
+  // The literal includes the double quotes.  Remove them.
+  std::string literal = ctx->STRING_LITERAL()->getText();
+  std::filesystem::path import_name = literal.substr(1, literal.length() - 2);
+
+  // Get directory names for the current source file and the import file name.
+  auto current_dir = std::filesystem::path(current_file_name()).parent_path();
+  auto import_dir = import_name.parent_path();
+
+  auto AddSlash = [](std::string path_name) {
+    if (!path_name.empty() && path_name.back() != '/')
+      path_name += "/";
+    return path_name;
+  };
+
+  // If the import name has directory information, use it.
+  if (!import_dir.empty()) {
+    if (!current_dir.empty() && !import_dir.is_absolute())
+      return ProcessInputFile(formatv("{0}{1}{2}", AddSlash(current_dir),
+                                      AddSlash(import_dir), import_name));
+    return ProcessInputFile(import_name);
+  }
+
+  // If the import name doesn't have directory info, see if its in the
+  // including file's directory.
+  if (!current_dir.empty()) {
+    auto name = formatv("{0}{1}", AddSlash(current_dir), import_name);
+    if (std::filesystem::exists(name))
+      return ProcessInputFile(name);
+  }
+
+  // If both the import dir and current directory are empty, check the current
+  // directory.
+  if (std::filesystem::exists(import_name))
+    return ProcessInputFile(import_name);
+
+  // If not found in the current directory, look in the import path.
+  if (!import_path_.empty()) {
+    auto name = formatv("{0}{1}", AddSlash(import_path_), import_name);
+    if (std::filesystem::exists(name))
+      return ProcessInputFile(name);
+  }
+
+  // Otherwise, just use the name verbatim.
+  return ProcessInputFile(import_name);
+}
+
+//-----------------------------------------------------------------------------
+// Process cpu_def and cpu_stmt rules:
+//      cpu_def : CPU|CORE ident '{' cpu_stmt+ '}'
+//      cpu_stmt: pipe_def | register_def | resource_def | issue_statement |
+//                cluster_instantiation | func_unit_instantiation ;
+//-----------------------------------------------------------------------------
+CpuInstance *MdlVisitor::VisitCpuDef(CpuDefCtx *ctx) {
+  Identifier *name = VisitIdent(ctx->ident());
+  auto *pipes = new PipeDefList;
+  auto *issues = new ResourceDefList;
+  auto *resources = new ResourceDefList;
+  int reorder_buffer_size = -1;
+  auto *func_units = new FuncUnitInstList;
+  auto *forward_stmts = new ForwardStmtList;
+  auto *clusters = new ClusterList;
+
+  // Fetch optional names and strip off quotes. Note that we need at least
+  // one name, so use the CPU name by default.
+  std::vector<std::string> aliases;
+  for (auto *item : ctx->STRING_LITERAL()) {
+    const auto &alias = item->getText();
+    aliases.push_back(alias.substr(1, alias.length() - 2));
+  }
+  if (aliases.empty())
+    aliases.push_back(name->name());
+
+  // for each non-terminal in cpu_stmt, collect information.
+  for (auto *stmt : ctx->cpu_stmt()) {
+    if (auto *pipe = stmt->pipe_def()) {
+      pipes->push_back(VisitPipeDef(pipe));
+    } else if (auto *res_def = stmt->resource_def()) {
+      ResourceDefList *res = VisitResourceDef(res_def);
+      resources->insert(resources->end(), res->begin(), res->end());
+    } else if (auto *func = stmt->func_unit_instantiation()) {
+      func_units->push_back(VisitFuncUnitInstantiation(func, resources));
+    } else if (auto *forward = stmt->forward_stmt()) {
+      forward_stmts->push_back(VisitForwardStmt(forward));
+    } else if (auto *issue = stmt->issue_statement()) {
+      ResourceDefList *slots = VisitIssueStatement(issue);
+      issues->insert(issues->end(), slots->begin(), slots->end());
+    } else if (auto *cluster = stmt->cluster_instantiation()) {
+      clusters->push_back(VisitClusterInstantiation(cluster));
+    } else if (auto *reorder = stmt->reorder_buffer_def())
+      reorder_buffer_size = reorder->size->value;
+  }
+
+  // If we found functional unit instantiations at the cpu level, create a
+  // cluster which contains those functional units. (This simplifies things
+  // downstream.) Resources can remain global, but any issue resources must
+  // also be associated with the generated cluster.
+  // If we (still) didn't see any clusters, add an empty one.
+  // NOTE: Currently, by design clusters are defined as having their own
+  // issue slots. This enforces that rule. If we want clusters to access
+  // CPU-level issue slots we'll have to redesign how this works.
+  MdlItem item(ctx, current_file_name());
+  if (clusters->empty() || !func_units->empty()) {
+    clusters->push_back(new ClusterInstance(item, new Identifier(item, "__"),
+                                            issues, new ResourceDefList,
+                                            func_units, new ForwardStmtList));
+    issues = new ResourceDefList; // reset the issues list.
+  }
+
+  // Note: forward statements defined at the CPU level stay at the CPU level.
+  return new CpuInstance(item, name, pipes, issues, resources,
+                         reorder_buffer_size, clusters, forward_stmts, aliases);
+}
+
+//-----------------------------------------------------------------------------
+// Process cluster definition rules:
+//      cluster_instantiation: CLUSTER ident '{' cluster_stmt+ '}'
+//      cluster_stmt: register_def | resource_def | issue_statement |
+//                    func_unit_instantiation ;
+// Return a ClusterInstance object that contains all the information.
+//-----------------------------------------------------------------------------
+ClusterInstance *
+MdlVisitor::VisitClusterInstantiation(ClusterInstantiationCtx *ctx) {
+  Identifier *name = VisitIdent(ctx->ident());
+  auto *issues = new ResourceDefList;
+  auto *resources = new ResourceDefList;
+  auto *func_units = new FuncUnitInstList;
+  auto *forward_stmts = new ForwardStmtList;
+
+  // For each rule in each cluster_stmt, collect and save information.
+  for (auto *stmt : ctx->cluster_stmt()) {
+    if (auto *res_def = stmt->resource_def()) {
+      ResourceDefList *res = VisitResourceDef(res_def);
+      resources->insert(resources->end(), res->begin(), res->end());
+    } else if (auto *func = stmt->func_unit_instantiation()) {
+      func_units->push_back(VisitFuncUnitInstantiation(func, resources));
+    } else if (auto *forward = stmt->forward_stmt()) {
+      forward_stmts->push_back(VisitForwardStmt(forward));
+    } else if (auto *issue = stmt->issue_statement()) {
+      ResourceDefList *slots = VisitIssueStatement(issue);
+      issues->insert(issues->end(), slots->begin(), slots->end());
+    }
+  }
+
+  MdlItem item(ctx, current_file_name());
+  return new ClusterInstance(item, name, issues, resources, func_units,
+                             forward_stmts);
+}
+
+//-----------------------------------------------------------------------------
+// Process func_unit_instantiation rules:
+//     func_unit_instantiation:
+//                       FUNCUNIT func_unit_instance (':' func_unit_instance)*
+//                       ident '(' resource refs ')'
+//                       ('-> (pin_one | pin_any | pin_all))? ';' ;
+//     func_unit_instance: ident ('<>' | '<' number '>')  ;
+// Return a FuncUnitInstance object that contains all the information.
+//-----------------------------------------------------------------------------
+FuncUnitInstance *
+MdlVisitor::VisitFuncUnitInstantiation(FuncUnitInstantiationCtx *ctx,
+                                       ResourceDefList *resources) {
+  auto *type = VisitIdent(ctx->type->ident());
+  bool unres = ctx->type->unreserved;
+  int buffer_size = ctx->type->buffered ? ctx->type->buffered->value : -1;
+  auto *name = VisitIdent(ctx->ident());
+  auto *refs = VisitResourceRefs(ctx->resource_refs(), resources);
+  IdList *pin_any = nullptr;
+  IdList *pin_all = nullptr;
+  MdlItem item(ctx, current_file_name());
+
+  // If we see a list of bases, create a functional unit template for it.
+  // Note: we don't currently allow implicitly defined instances to have
+  // arguments passed to them.  If you want that, then define the template!
+  if (ctx->bases) {
+    IdList *bases = new IdList;
+    // Collect some information about base classes.
+    for (auto *base : ctx->func_unit_bases()) {
+      auto *instance = base->func_unit_instance();
+      auto *base_name = VisitIdent(instance->ident());
+      bases->push_back(base_name);
+      if (instance->buffered)
+        buffer_size = std::max(buffer_size, (int)instance->buffered->value);
+    }
+
+    // Create an aggregate functional unit template, including the top-level
+    // type in the aggregate.
+    bases->insert(bases->begin(), type);
+    auto derived = StringVec<Identifier *>(bases, "&", "_", "");
+    type = new Identifier(*type, derived);
+    if (FindItem(spec().func_units(), derived) == nullptr)
+      spec().func_units().push_back(new FuncUnitTemplate(item, type, bases));
+  }
+
+  if (ctx->one) {
+    pin_any = new IdList;
+    pin_any->push_back(VisitIdent(ctx->one->ident()));
+  }
+
+  if (ctx->all)
+    pin_all = VisitPinAll(ctx->all);
+  if (ctx->any)
+    pin_any = VisitPinAny(ctx->any);
+
+  return new FuncUnitInstance(item, type, name, unres, buffer_size, refs,
+                              pin_any, pin_all);
+}
+
+//-----------------------------------------------------------------------------
+// Process a single CPU forward statement:
+//     forward_stmt : FORWARD ident '->' forward_to_unit (',' forward_to_unit)?
+//     forward_to_unit : ident ('(' snumber ')')?
+//-----------------------------------------------------------------------------
+ForwardStmt *MdlVisitor::VisitForwardStmt(ForwardStmtCtx *ctx) {
+  ForwardToSet to_units;
+  Identifier *from_unit = VisitIdent(ctx->ident());
+
+  for (auto *forward : ctx->forward_to_unit()) {
+    Identifier *to_unit = VisitIdent(forward->ident());
+    int cycles = forward->cycles ? forward->cycles->value : 1;
+    to_units.emplace_back(to_unit, cycles);
+  }
+
+  MdlItem item(ctx, current_file_name());
+  return new ForwardStmt(item, from_unit, to_units);
+}
+
+//-----------------------------------------------------------------------------
+// Process a pin_any rule.
+//       pin_any: ident ('|' ident)+ ;
+// Return a list of names.
+//-----------------------------------------------------------------------------
+IdList *MdlVisitor::VisitPinAny(PinAnyCtx *ctx) {
+  auto *names = new IdList;
+  for (auto *name_ctx : ctx->ident())
+    names->push_back(VisitIdent(name_ctx));
+  return names;
+}
+
+//-----------------------------------------------------------------------------
+// Process a pin_all rule.
+//       pin_all: ident ('&' ident)+ ;
+// Return a list of names.
+//-----------------------------------------------------------------------------
+IdList *MdlVisitor::VisitPinAll(PinAllCtx *ctx) {
+  auto *names = new IdList;
+  for (auto *name_ctx : ctx->ident())
+    names->push_back(VisitIdent(name_ctx));
+  return names;
+}
+
+//-----------------------------------------------------------------------------
+// Process function_unit_template and func_unit_template_stmt rules:
+//  func_unit_template     : FUNCUNIT ident (':' base) '(' func_unit_params? ')'
+//                           '{' func_unit_template_stmt* '}'
+//  func_unit_template_stmt: resource_def | port_def |
+//                              connect_stmt | subunit_instantiation ;
+// Return a FuncUnitTemplate object that contains all the information.
+//-----------------------------------------------------------------------------
+FuncUnitTemplate *MdlVisitor::VisitFuncUnitTemplate(FuncUnitTemplateCtx *ctx) {
+  auto *ports = new IdList;
+  auto *resources = new ResourceDefList;
+  auto *connects = new ConnectList;
+  auto *subunits = new SubUnitInstList;
+
+  Identifier *type = VisitIdent(ctx->type);
+  IdList *bases = VisitBaseList(ctx->base);
+  ParamsList *params = VisitFuncUnitParams(ctx->func_unit_params());
+
+  // Process each rule matched in template statements, save off info.
+  for (auto *stmt : ctx->func_unit_template_stmt()) {
+    if (auto *res_def = stmt->resource_def()) {
+      ResourceDefList *res = VisitResourceDef(res_def);
+      resources->insert(resources->end(), res->begin(), res->end());
+    } else if (auto *port_def = stmt->port_def()) {
+      IdList *defs = VisitPortDef(port_def, connects);
+      ports->insert(ports->end(), defs->begin(), defs->end());
+    } else if (auto *conn = stmt->connect_stmt()) {
+      connects->push_back(VisitConnectStmt(conn));
+    } else if (auto *subunit = stmt->subunit_instantiation()) {
+      SubUnitInstList *items = VisitSubunitInstantiation(subunit, resources);
+      subunits->insert(subunits->end(), items->begin(), items->end());
+    }
+  }
+
+  MdlItem item(ctx, current_file_name());
+  return new FuncUnitTemplate(item, type, bases, params, ports, resources,
+                              connects, subunits);
+}
+
+//-----------------------------------------------------------------------------
+// Process a functional unit group definition:
+//     func_unit_group : FUNCGROUP ident ('<' number '>')? ':' name_list ';'
+//-----------------------------------------------------------------------------
+FuncUnitGroup *MdlVisitor::VisitFuncUnitGroup(FuncUnitGroupCtx *ctx) {
+  Identifier *name = VisitIdent(ctx->name);
+  IdList *members = VisitNameList(ctx->members);
+  int buffer_size = ctx->buffered ? ctx->buffered->value : -1;
+
+  MdlItem item(ctx, current_file_name());
+  return new FuncUnitGroup(item, name, buffer_size, members);
+}
+
+//-----------------------------------------------------------------------------
+// Process func_unit_params rules: (arguments to a functional unit template).
+//      func_unit_params : fu_decl_item (';' fu_decl_item)*
+// Return a vector of parameters.
+//-----------------------------------------------------------------------------
+ParamsList *MdlVisitor::VisitFuncUnitParams(FuncUnitParamsCtx *ctx) {
+  auto *params = new ParamsList;
+  if (ctx == nullptr)
+    return params; // Return if zero parameters.
+
+  for (auto *decl : ctx->fu_decl_item()) {
+    ParamsList *param = VisitFuDeclItem(decl);
+    params->insert(params->end(), param->begin(), param->end());
+  }
+  return params; // return the list of parameters.
+}
+
+//-----------------------------------------------------------------------------
+// Process fu_decl_item rules:
+//      func_decl_item : RESOURCE name_list | CLASS name_list
+// Each namelist can define a list of parameters. We want to flatten those
+// lists to a single list of resources and classes, and return a single list
+// of class and resource definitions.
+//-----------------------------------------------------------------------------
+ParamsList *MdlVisitor::VisitFuDeclItem(FuDeclItemCtx *ctx) {
+  auto *params = new ParamsList;
+  ParamType type = ctx->RESOURCE() ? kParamResource : kParamClass;
+  IdList *names = VisitNameList(ctx->name_list());
+
+  for (auto *name : *names) {
+    MdlItem item(*name);
+    params->push_back(new Params(item, name, type));
+  }
+
+  return params;
+}
+
+//-----------------------------------------------------------------------------
+// Process the port_def rules: (part of a functional unit template definition).
+//      PORT port_decl (',' port_decl )*
+// Return a list of port definitions.
+//-----------------------------------------------------------------------------
+IdList *MdlVisitor::VisitPortDef(PortDefCtx *ctx, ConnectList *connects) {
+  auto *names = new IdList;
+
+  // Append the port definition names
+  for (auto *item : ctx->port_decl())
+    names->push_back(VisitPortDecl(item, connects));
+
+  return names;
+}
+
+//-----------------------------------------------------------------------------
+// Process a single port definition.  The definition may optionally include
+// a register class and a list of resource references.
+//     ident ('<' reg_class=ident '>')? ('(' ref=resource_ref ')')?
+// If a declaration contains connection information, create CONNECT records.
+//-----------------------------------------------------------------------------
+Identifier *MdlVisitor::VisitPortDecl(PortDeclCtx *ctx, ConnectList *connects) {
+  auto *name = VisitIdent(ctx->name);
+
+  if (ctx->reg_class != nullptr || ctx->ref != nullptr) {
+    auto *reg_class = VisitIdent(ctx->reg_class);
+    auto *ref = VisitResourceRef(ctx->ref);
+    MdlItem item(ctx, current_file_name());
+    connects->push_back(new Connect(item, name, reg_class, ref));
+  }
+  return name;
+}
+
+//-----------------------------------------------------------------------------
+// Process connect_stmt rules: (part of a functional unit template definition).
+//      CONNECT ident ('to' ident)? ('via' resource_ref)? ';' ;
+// Return a Connect object that contains all the information.
+//-----------------------------------------------------------------------------
+Connect *MdlVisitor::VisitConnectStmt(ConnectStmtCtx *ctx) {
+  auto *port = VisitIdent(ctx->port);
+  auto *reg_class = VisitIdent(ctx->reg_class);
+  auto *ref = VisitResourceRef(ctx->resource_ref());
+
+  MdlItem item(ctx, current_file_name());
+  return new Connect(item, port, reg_class, ref);
+}
+
+//-----------------------------------------------------------------------------
+// Process subunit_instantiation rules: (also part of func unit definitions).
+//     subunit_instantiation:
+//                       (predicate=name_list ':')? subunit_statement
+//                     | (predicate=name_list ':' '{' subunit_statement* '}' ';'
+// Return a SubUnitInstance object that contains the information.
+//-----------------------------------------------------------------------------
+SubUnitInstList *
+MdlVisitor::VisitSubunitInstantiation(SubunitInstantiationCtx *ctx,
+                                      ResourceDefList *resources) {
+  IdList *predicate = VisitNameList(ctx->predicate);
+  auto *statements = new SubUnitInstList;
+
+  for (auto *stmt : ctx->subunit_statement()) {
+    auto subunits = VisitSubunitStatement(stmt, predicate, resources);
+    statements->insert(statements->end(), subunits->begin(), subunits->end());
+  }
+
+  return statements;
+}
+
+//-----------------------------------------------------------------------------
+// Process subunit_statement rules:
+//     subunit_statement: SUBUNIT subunit_instance (',' subunit_instance)* ';'
+//     subunit_instance:  ident '(' resource_refs ')'
+//-----------------------------------------------------------------------------
+SubUnitInstList *MdlVisitor::VisitSubunitStatement(SubunitStatementCtx *ctx,
+                                                   IdList *predicate,
+                                                   ResourceDefList *resources) {
+  auto *subunits = new SubUnitInstList;
+
+  for (auto *instance : ctx->subunit_instance()) {
+    auto *name = VisitIdent(instance->ident());
+    auto *args = VisitResourceRefs(instance->resource_refs(), resources);
+
+    MdlItem item(ctx, current_file_name());
+    subunits->push_back(new SubUnitInstance(item, name, args, predicate));
+  }
+  return subunits;
+}
+
+//-----------------------------------------------------------------------------
+// Process subunit_template rules:
+//     subunit_template: SUBUNIT ident (':' ident)? '(' su_decl_items ')'
+//                 (('{' subunit_body* '}') | ('{{' latency_items? '}}') );
+// Return a SubUnitTemplate object that contains all the information.
+//-----------------------------------------------------------------------------
+SubUnitTemplate *MdlVisitor::VisitSubunitTemplate(SubunitTemplateCtx *ctx) {
+  Identifier *name = VisitIdent(ctx->name);
+  ParamsList *params = VisitSuDeclItems(ctx->su_decl_items());
+
+  IdList *bases = nullptr;
+  StringList *regex = nullptr;
+  VisitSuBaseList(ctx->base, bases, regex);
+
+  auto *latencies = new LatencyInstList;
+
+  LatencyTemplate *inline_lat = nullptr;
+  MdlItem item(ctx, current_file_name());
+
+  // If the body of the subunit is a list of subunit statements, return a list
+  // of those instantiations.
+  if (ctx->body) {
+    for (auto *stmt : ctx->subunit_body()) {
+      if (auto *latency = stmt->latency_instance()) {
+        auto *lats = (VisitLatencyInstance(latency));
+        latencies->insert(latencies->end(), lats->begin(), lats->end());
+        continue;
+      }
+    }
+  }
+
+  // If the body of the subunit is an inlined latency template, create a
+  // new latency template (with the same name as the subunit), and create a
+  // latency instance which refers to that latency template. The created
+  // latency template will be returned as part of the subunit.
+  if (!ctx->latency_items().empty()) {
+    // Create a new latency template.
+    Identifier *tname = VisitIdent(ctx->name);
+    ParamsList *tparams = VisitSuDeclItems(ctx->su_decl_items());
+    auto items = ctx->latency_items();
+    ReferenceList *refs = VisitLatencyItems(items);
+    inline_lat = new LatencyTemplate(item, tname, nullptr, tparams, refs);
+
+    // Create an instance for the new latency template for this subunit.
+    Identifier *name = VisitIdent(ctx->name);
+    ResourceRefList *args = new ResourceRefList;
+    for (auto *param : *params)
+      args->push_back(new ResourceRef(item, param->id()));
+    latencies->push_back(new LatencyInstance(item, name, args, nullptr));
+  }
+
+  return new SubUnitTemplate(item, name, bases, regex, params, latencies,
+                             inline_lat);
+}
+
+//-----------------------------------------------------------------------------
+// Process su_decl_items rules: (part of a subunit template definition).
+//      su_decl_items: su_decl_item (';' subunit_decl_item)* ;
+// Return a list of subunit parameters.
+//-----------------------------------------------------------------------------
+ParamsList *MdlVisitor::VisitSuDeclItems(SuDeclItemsCtx *ctx) {
+  auto *params = new ParamsList;
+
+  if (ctx == nullptr)
+    return params; // Return if no parameters.
+
+  // Append the lists of resources or ports together.
+  for (auto *item : ctx->su_decl_item()) {
+    ParamsList *param = VisitSuDeclItem(item);
+    params->insert(params->end(), param->begin(), param->end());
+  }
+  return params; // return the list of declared items.
+}
+
+//-----------------------------------------------------------------------------
+// Process su_decl_item rules: (part of a subunit template definition).
+//      su_decl_item: RESOURCE name_list | PORT name_list ;
+// Return a list of resource or port parameter definitions.
+//-----------------------------------------------------------------------------
+ParamsList *MdlVisitor::VisitSuDeclItem(SuDeclItemCtx *ctx) {
+  auto *params = new ParamsList;
+  ParamType type = ctx->RESOURCE() ? kParamResource : kParamPort;
+  IdList *names = VisitNameList(ctx->name_list());
+
+  for (auto *name : *names) {
+    MdlItem item(*name);
+    params->push_back(new Params(item, name, type));
+  }
+
+  return params; // return the list of resources or ports.
+}
+
+//-----------------------------------------------------------------------------
+// Process latency_instance rules: (part of a subunit template definition).
+//     latency_instance: (predicate=name_list ':')? latency_statement
+//                     | (predicate=name_list ':' '{' latency_statement* '}' ';'
+// Return a LatencyInstance object that contains all the information.
+//-----------------------------------------------------------------------------
+LatencyInstList *MdlVisitor::VisitLatencyInstance(LatencyInstanceCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+
+  IdList *predicates = VisitNameList(ctx->predicate);
+  auto *statements = new LatencyInstList;
+
+  for (auto *stmt : ctx->latency_statement())
+    statements->push_back(VisitLatencyStatement(stmt, predicates));
+
+  return statements;
+}
+
+//-----------------------------------------------------------------------------
+// Process latency_statement rules:
+//     latency_instance: LATENCY ident '(' name_list ')' ';' ;
+// Return a LatencyInstance object that contains all the information.
+//-----------------------------------------------------------------------------
+LatencyInstance *MdlVisitor::VisitLatencyStatement(LatencyStatementCtx *ctx,
+                                                   IdList *predicates) {
+  auto *name = VisitIdent(ctx->ident());
+  auto *args = VisitResourceRefs(ctx->resource_refs());
+
+  MdlItem item(ctx, current_file_name());
+  return new LatencyInstance(item, name, args, predicates);
+}
+
+//-----------------------------------------------------------------------------
+// Process latency_template rules.
+//     latency_template: LATENCY ident (':' ident)* '(' su_decl_items ')'
+//                             '{' latency_items? '}' ;
+// Return a LatencyTemplate object that contains all the information.
+//-----------------------------------------------------------------------------
+LatencyTemplate *MdlVisitor::VisitLatencyTemplate(LatencyTemplateCtx *ctx) {
+  Identifier *name = VisitIdent(ctx->name);
+  IdList *base = VisitBaseList(ctx->base);
+
+  ParamsList *params = VisitSuDeclItems(ctx->su_decl_items());
+  auto items = ctx->latency_items();
+  ReferenceList *refs = VisitLatencyItems(items);
+
+  MdlItem item(ctx, current_file_name());
+  return new LatencyTemplate(item, name, base, params, refs);
+}
+
+//-----------------------------------------------------------------------------
+// Process latency_items rules: (part of a latency template definition).
+//      latency_items: latency_item?
+// Return a list of Reference objects.
+//-----------------------------------------------------------------------------
+ReferenceList *
+MdlVisitor::VisitLatencyItems(std::vector<LatencyItemsCtx *> &ctx) {
+  auto *references = new ReferenceList;
+  for (auto *lat_item : ctx) {
+    IdList *predicates = VisitNameList(lat_item->predicate);
+    auto item = lat_item->latency_item();
+    auto *refs = VisitLatencyItem(item, predicates);
+    references->insert(references->end(), refs->begin(), refs->end());
+  }
+  return references;
+}
+
+//-----------------------------------------------------------------------------
+// Process latency_item rules: (part of a latency template definition).
+//    latency_item : latency_ref
+//                 | conditional_ref
+//                 | fu_statement ;
+// Return a single Reference object that describes a single latency.
+//-----------------------------------------------------------------------------
+ReferenceList *MdlVisitor::VisitLatencyItem(std::vector<LatencyItemCtx *> &ctx,
+                                            IdList *predicates) {
+  ReferenceList *refs = new ReferenceList;
+  for (auto *lat : ctx) {
+    if (lat->latency_ref())
+      refs->push_back(VisitLatencyRef(lat->latency_ref(), predicates));
+    if (lat->conditional_ref())
+      refs->push_back(VisitConditionalRef(lat->conditional_ref(), predicates));
+    if (lat->fus_statement())
+      VisitFusStatement(refs, lat->fus_statement());
+  }
+  return refs;
+}
+
+//-----------------------------------------------------------------------------
+// Process a conditional reference statement.
+//   conditional_ref : 'if' ident '{' latency_item* '}'
+//                                   (conditional_elseif | conditional_else)?
+//   conditional_elseif : 'else' 'if' ident '{' latency_item* '}'
+//                                   (conditional_elseif | conditional_else)?
+//   conditional_else : 'else' '{' latency_item* '}'
+//-----------------------------------------------------------------------------
+Reference *MdlVisitor::VisitConditionalRef(ConditionalRefCtx *ctx,
+                                           IdList *predicates) {
+  auto *pred = VisitIdent(ctx->ident());
+  auto lat = ctx->latency_item();
+  auto *refs = VisitLatencyItem(lat, nullptr);
+
+  auto *elseif = VisitConditionalElseIf(ctx->conditional_elseif());
+  auto *ref_else = VisitConditionalElse(ctx->conditional_else());
+  auto else_clause = elseif ? elseif : ref_else;
+
+  MdlItem item(ctx, current_file_name());
+  auto *ref_if = new ConditionalRef(item, pred, refs, else_clause);
+  return new Reference(item, predicates, ref_if);
+}
+
+ConditionalRef *MdlVisitor::VisitConditionalElseIf(ConditionalElseIfCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+  auto *pred = VisitIdent(ctx->ident());
+  auto lat = ctx->latency_item();
+  auto *refs = VisitLatencyItem(lat, nullptr);
+  ConditionalRef *elseif = nullptr;
+  ConditionalRef *ref_else = nullptr;
+
+  if (ctx->conditional_elseif())
+    elseif = VisitConditionalElseIf(ctx->conditional_elseif());
+  if (ctx->conditional_else())
+    ref_else = VisitConditionalElse(ctx->conditional_else());
+  auto else_clause = elseif ? elseif : ref_else;
+
+  MdlItem item(ctx, current_file_name());
+  return new ConditionalRef(item, pred, refs, else_clause);
+}
+
+ConditionalRef *MdlVisitor::VisitConditionalElse(ConditionalElseCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+  auto lat = ctx->latency_item();
+  auto *refs = VisitLatencyItem(lat, nullptr);
+
+  MdlItem item(ctx, current_file_name());
+  return new ConditionalRef(item, nullptr, refs, nullptr);
+}
+
+//-----------------------------------------------------------------------------
+// Process latency_ref rules: (part of a latency template definition).
+//    latency_ref: ref_type '(' latency_spec ')' ';'
+//     latency_spec: phase_expr (':' cycles=number)? ',' resource_refs
+//                 | phase_expr ('[' repeat=number (',' delay=number)? ']')?
+//                              ',' operand
+//                 | phase_expr ',' operand ',' resource_refs ;
+// Return a single Reference object that describes a single latency.
+//-----------------------------------------------------------------------------
+Reference *MdlVisitor::VisitLatencyRef(LatencyRefCtx *ctx, IdList *predicates) {
+  auto ref_type = StringToRefType(ctx->ref_type()->getText());
+  auto *spec = ctx->latency_spec();
+
+  auto *phase = VisitExpr(spec->expr());
+  auto *opnd = VisitOperand(spec->operand());
+  auto *refs = VisitLatencyResourceRefs(spec->latency_resource_refs());
+  int cycles = 1;
+  if (spec->cycles)
+    cycles = spec->cycles->value;
+  int repeat = 1;
+  if (spec->repeat)
+    repeat = spec->repeat->value;
+  int delay = 1;
+  if (spec->delay)
+    delay = spec->delay->value;
+
+  MdlItem item(ctx, current_file_name());
+  return new Reference(item, predicates, ref_type, phase, repeat, delay, cycles,
+                       opnd, refs);
+}
+
+//-----------------------------------------------------------------------------
+// Process expr rules.  These are part of latency specs, and are a limited
+// set of operations for calculating pipeline latencies:
+//     expr: '-' expr  | expr ('*'|'/') expr | expr ('+'|'-') expr |
+//           '(' expr ')' | number | operand ;
+// Return a single expression tree root.
+//-----------------------------------------------------------------------------
+PhaseExpr *MdlVisitor::VisitExpr(ExprCtx *ctx) {
+  // If expression is missing (for fus entries) just return nullptr;
+  if (ctx == nullptr)
+    return nullptr;
+
+  MdlItem item(ctx, current_file_name());
+
+  // Handle unary negation.
+  if (ctx->negate)
+    return new PhaseExpr(item, kNeg, VisitExpr(ctx->negate), nullptr);
+
+  // Handle multiply and divide.
+  if (ctx->mop) {
+    PhaseOp op = ctx->mop->getText()[0] == '*' ? kMult : kDiv;
+    return new PhaseExpr(item, op, VisitExpr(ctx->left), VisitExpr(ctx->right));
+  }
+
+  // Handle addition and subtraction.
+  if (ctx->aop) {
+    PhaseOp op = ctx->aop->getText()[0] == '+' ? kPlus : kMinus;
+    return new PhaseExpr(item, op, VisitExpr(ctx->left), VisitExpr(ctx->right));
+  }
+
+  // Handle a phase name.
+  if (ctx->phase_name)
+    return new PhaseExpr(item, kPhase, VisitIdent(ctx->phase_name));
+
+  // Handle a "truncate to positive" operator {...}.
+  if (ctx->posexpr) {
+    return new PhaseExpr(item, kPositive, VisitExpr(ctx->posexpr), nullptr);
+  }
+
+  // Handle a parenthesised subexpression.
+  if (ctx->subexpr)
+    return VisitExpr(ctx->subexpr);
+
+  // Handle a literal constant.
+  if (ctx->num)
+    return new PhaseExpr(item, kInt, ctx->num->value);
+
+  // Handle an instruction operand reference.
+  OperandRef *opnd = VisitOperand(ctx->opnd);
+  return new PhaseExpr(item, kOpnd, opnd);
+}
+
+//-----------------------------------------------------------------------------
+// Process operand rules.  Operands are part of latency expressions, and
+// refer to operands in the target instructions.
+//      operand: (ident ':')? '$' ident ('.' operand_ref)*
+//             | (ident ':')? '$' number
+//             | (ident ':')? '$$' number
+// This syntax corresponds closely to operands in llvm td files.
+// A "$number" operand refers directly to an operand by index.
+// A "$$number" operand refers to a variadic operand, by index (1,2,3...).
+// Return an OperandRef, which references an operand of an instruction.
+//-----------------------------------------------------------------------------
+OperandRef *MdlVisitor::VisitOperand(OperandCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+  auto *operand = new IdList;
+  Identifier *type = VisitIdent(ctx->type);
+  Identifier *opnd = ctx->opnd ? VisitIdent(ctx->opnd) : nullptr;
+
+  // Handle the normal case of an operand name.
+  if (opnd)
+    operand->push_back(opnd);
+  // Handle an operand index reference ($<number>).
+  if (ctx->opnd_id) {
+    MdlItem item(ctx->opnd_id, current_file_name());
+    operand->push_back(new Identifier(item, ctx->opnd_id->getText()));
+  }
+  // Handle a variadic operand index reference ($$<number>).
+  if (ctx->var_opnd_id) {
+    MdlItem item(ctx->var_opnd_id, current_file_name());
+    operand->push_back(
+        new Identifier(item, formatv("${0}", ctx->var_opnd_id->value)));
+  }
+
+  // If there are operand qualifiers (suboperands), add them to the list.
+  for (auto *ref_ctx : ctx->operand_ref()) {
+    MdlItem item(ref_ctx, current_file_name());
+    operand->push_back(new Identifier(item, ref_ctx->getText()));
+  }
+
+  MdlItem item(ctx, current_file_name());
+  return new OperandRef(item, type, operand);
+}
+
+//-----------------------------------------------------------------------------
+// Process a functional unit usage statement.
+//    fu_statement: FUS '(' fus_item ('&' fus_item)* ')' ',' snumber
+//                                  (',' fus_attribute)* ')' ';'
+//    fus_item: name=ident ('<' (expr ':')? number '>')?
+//-----------------------------------------------------------------------------
+void MdlVisitor::VisitFusStatement(ReferenceList *refs, FusStatementCtx *ctx) {
+  MdlItem item(ctx, current_file_name());
+  int micro_ops = ctx->micro_ops->value;
+
+  // If we've seen functional unit attributes, collect them.
+  RefFlags::Item flags = RefFlags::kNone;
+  for (auto flag : ctx->fus_attribute()) {
+    if (flag->getText() == "BeginGroup")
+      flags |= RefFlags::kBeginGroup;
+    else if (flag->getText() == "EndGroup")
+      flags |= RefFlags::kEndGroup;
+    else if (flag->getText() == "SingleIssue")
+      flags |= RefFlags::kSingleIssue;
+    else if (flag->getText() == "RetireOOO")
+      flags |= RefFlags::kRetireOOO;
+  }
+
+  // If there weren't any functional units specified, generate a single
+  // reference.
+  if (ctx->fus_item().empty()) {
+    MdlItem ref(ctx, current_file_name());
+    refs->push_back(new Reference(item, micro_ops, flags));
+    return;
+  }
+
+  for (auto *funit : ctx->fus_item()) {
+    MdlItem ref(funit, current_file_name());
+    auto *fu = VisitIdent(funit->ident());
+    auto *expr = VisitExpr(funit->expr());
+    int cycles = funit->number() ? funit->number()->value : 1;
+    refs->push_back(new Reference(item, RefTypes::kFus, expr, cycles, micro_ops,
+                                  flags, new ResourceRef(ref, fu)));
+    micro_ops = 0; // Only include micro-ops on the first unit
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Process pipe_def rules.  This is a top-level description of the
+// processor's pipeline phases:
+//    pipe_def: protection? PIPE_PHASE ident '{' pipe_phases '}' ';' ;
+// Return a PipePhases object which contains the names of pipeline
+// phases of a CPU, and the protected attribute of the pipeline.
+//-----------------------------------------------------------------------------
+PipePhases *MdlVisitor::VisitPipeDef(PipeDefCtx *ctx) {
+  // Determine if this is a protected or unprotected pipeine. If you don't
+  // specify it, we assume its protected.
+  bool is_protected = true;
+  bool is_hard = false;
+  if (auto *protect = ctx->protection()) {
+    if (protect->UNPROTECTED())
+      is_protected = false;
+    if (protect->HARD())
+      is_hard = true;
+  }
+  Identifier *name = VisitIdent(ctx->ident());
+  PhaseName *exe_phase = nullptr;
+  auto *phases =
+      VisitPipePhases(ctx->pipe_phases(), is_protected, is_hard, exe_phase);
+
+  MdlItem item(ctx, current_file_name());
+  return new PipePhases(item, name, phases, exe_phase, is_protected, is_hard);
+}
+
+//-----------------------------------------------------------------------------
+// Process pipe_phases rules.  These correspond to a set of pipeline phases.
+//     pipe_phases: phase_id (',' phase_id)* ;
+// Return a list of pipeline phase names.
+//-----------------------------------------------------------------------------
+PhaseNameList *MdlVisitor::VisitPipePhases(PipePhasesCtx *ctx,
+                                           bool is_protected, bool is_hard,
+                                           PhaseName *&exe_phase) {
+  auto *phases = new PhaseNameList;
+  bool is_first;
+
+  // Since each phase_id can return a list, append the lists together.
+  for (auto *phase_ctx : ctx->phase_id()) {
+    auto *phaseset = VisitPhaseId(phase_ctx, is_protected, is_hard, is_first);
+    if (is_first && exe_phase == nullptr)
+      exe_phase = phaseset->front();
+    phases->insert(phases->end(), phaseset->begin(), phaseset->end());
+  }
+
+  // After we've seen all the phases, assign phase indexes to each:
+  // If a value was provided (e.g. name=23), use that value as the next id
+  // to use. If not, just assign the next sequential id.  NOTE that this
+  // explicitly allows duplicates!!
+  int phase_index = 0;
+  for (auto *phase : *phases) {
+    if (phase->index() == -1)
+      phase->set_index(phase_index++);
+    else
+      phase_index = phase->index() + 1;
+  }
+
+  return phases;
+}
+
+//-----------------------------------------------------------------------------
+// Process phase_id rules. These return a single name or a set of names.
+//     phase_id: ident ('[' range ']') ('=' number)? ;
+// Return a list of (possibly one) pineline phase names.
+//-----------------------------------------------------------------------------
+PhaseNameList *MdlVisitor::VisitPhaseId(PhaseIdCtx *ctx, bool is_protected,
+                                        bool is_hard, bool &is_first) {
+  auto *phases = new PhaseNameList;
+  Identifier *phase = VisitIdent(ctx->ident());
+  MdlItem item(ctx, current_file_name());
+
+  // If a range was not specified, just return the name we found.
+  if (!ctx->range()) {
+    phases->push_back(
+        new PhaseName(item, phase->name(), is_protected, is_hard));
+  } else {
+    // If a range was specified, create a vector of names (range inclusive).
+    int first = ctx->range()->first->value;
+    int last = ctx->range()->last->value;
+    if (first > last)
+      first = last; // silly, but sanity check it.
+
+    for (int id = first; id <= last; id++)
+      phases->push_back(new PhaseName(
+          item, formatv("{0}{1}", phase->name(), id), is_protected, is_hard));
+  }
+
+  // If a number was specified, set the first index in the list.
+  if (ctx->number())
+    phases->front()->set_index(ctx->number()->value);
+
+  is_first = (ctx->first_exe != nullptr);
+  return phases;
+}
+
+//-----------------------------------------------------------------------------
+// Process resource_def rules, return a list of defs.
+//     resource_def : RESOURCE resource_decl (',' resource_decl)* ';'
+// An alternate form allows you to specify a range of phase ids associated
+// with a resource:
+//     resource_def : RESOURCE '(' phase ('..' phase)? ')' resource_decl...
+// Return a list of resource definitions.
+//-----------------------------------------------------------------------------
+ResourceDefList *MdlVisitor::VisitResourceDef(ResourceDefCtx *ctx) {
+  auto *defs = new ResourceDefList;
+  Identifier *start = VisitIdent(ctx->start);
+  Identifier *end = VisitIdent(ctx->end);
+
+  for (auto *def_ctx : ctx->resource_decl())
+    defs->push_back(VisitResourceDecl(def_ctx, start, end));
+
+  return defs;
+}
+
+//-----------------------------------------------------------------------------
+// Process resource_decl rules, which define a single resource.
+//     resource_decl: ident (':' number)? ('[' number ']')?
+//                  | ident (':' number)? '{' name_list '}' ;
+//                  | ident (':' number)? '{' group_list '}' ;
+// This handles resource definitions of the forms:
+//     resource name;       // a single named resource.
+//     resource name:4;     // a resource with 4 bits of data.
+//     resource name[6];    // a pool of 6 resources.
+//     resource name:31[5]; // a pool of 5 31-bit resources.
+//     resource name:6 { member1, member2 }; // a pool of two named resources.
+// Return a ResourceDef object that describes a single resource.
+//-----------------------------------------------------------------------------
+ResourceDef *MdlVisitor::VisitResourceDecl(ResourceDeclCtx *ctx,
+                                           Identifier *start, Identifier *end) {
+  Identifier *name = VisitIdent(ctx->ident());
+  MdlItem item(ctx, current_file_name());
+
+  int bits = ctx->bits ? ctx->bits->value : -1;
+  int count = ctx->count ? ctx->count->value : -1;
+
+  if (ctx->name_list()) {
+    IdList *members = VisitNameList(ctx->name_list());
+    return new ResourceDef(item, name, bits, members, start, end);
+  }
+
+  if (ctx->group_list()) {
+    IdList *members = VisitGroupList(ctx->group_list());
+    auto *group = new ResourceDef(item, name, bits, members, start, end);
+    group->set_group_type(ctx->group_list()->group_and != nullptr
+                              ? GroupType::kUseAll
+                              : GroupType::kUseSingle);
+    return group;
+  }
+
+  return new ResourceDef(item, name, bits, count, start, end);
+}
+
+//-----------------------------------------------------------------------------
+// Process latency resource_refs rules.
+//     latency_resource_refs: latency_resource_ref (',' latency_resource_ref)*
+// Return a list of resource references.
+//-----------------------------------------------------------------------------
+ResourceRefList *
+MdlVisitor::VisitLatencyResourceRefs(LatencyResourceRefsCtx *ctx) {
+  auto *refs = new ResourceRefList;
+  if (ctx == nullptr)
+    return refs;
+
+  for (auto *ref_ctx : ctx->latency_resource_ref())
+    refs->push_back(VisitLatencyResourceRef(ref_ctx));
+
+  return refs;
+}
+
+//-----------------------------------------------------------------------------
+// Process resource_ref rules.  Handle a single resource reference.
+//   resource_ref: resource_ref ':' count=number (':' value=ident)?
+//               | resource_ref ':' countname=ident (':' value=ident)?
+//               | resource_ref (':' ':' value=ident)?
+//               | resource_ref ':' all='*'
+//               | resource_ref
+// Return a single resource reference object.
+//-----------------------------------------------------------------------------
+ResourceRef *MdlVisitor::VisitLatencyResourceRef(LatencyResourceRefCtx *ctx) {
+  ResourceRef *res = VisitResourceRef(ctx->resource_ref());
+  Identifier *countname = VisitIdent(ctx->countname);
+  Identifier *value_mask = VisitIdent(ctx->value);
+  int count = ctx->count ? ctx->count->value : -1;
+  MdlItem item(ctx, current_file_name());
+
+  if (value_mask)
+    res->set_value_name(value_mask);
+  if (ctx->countname)
+    res->set_pool_count_name(countname);
+  if (ctx->count)
+    res->set_pool_count(count);
+  if (ctx->all)
+    res->set_use_all_members();
+  return res;
+}
+
+//-----------------------------------------------------------------------------
+// Process resource_refs rules.  Return a list of resource references.
+//     resource_refs: resource_ref (',' resource_ref)*
+// Return a list of resource references.
+//-----------------------------------------------------------------------------
+ResourceRefList *
+MdlVisitor::VisitResourceRefs(ResourceRefsCtx *ctx,
+                              ResourceDefList *resources /* = nullptr */) {
+  auto *refs = new ResourceRefList;
+
+  if (ctx == nullptr)
+    return refs;
+
+  for (auto *ref_ctx : ctx->resource_ref())
+    refs->push_back(VisitResourceRef(ref_ctx, resources));
+
+  return refs;
+}
+
+//-----------------------------------------------------------------------------
+// Process resource_ref rules.  Handle a single resource reference.
+//   resource_ref : ident '.' ident           // specify which member
+//                | ident '[' range ']'       // specify a range of members
+//                | ident '[' number ']'      // specify a single member
+//                | ident ('|' ident)+        // implicitly defined group
+//                | ident ('&' ident)+        // implicitly defined group
+//                | ident ;
+// Return a single resource reference object.
+//-----------------------------------------------------------------------------
+ResourceRef *
+MdlVisitor::VisitResourceRef(ResourceRefCtx *ctx,
+                             ResourceDefList *resources /* = nullptr */) {
+  if (ctx == nullptr)
+    return nullptr;
+  Identifier *name = VisitIdent(ctx->name);
+  MdlItem item(ctx, current_file_name());
+
+  if (ctx->member) {
+    Identifier *id = VisitIdent(ctx->member);
+    return new ResourceRef(item, name, id);
+  }
+  if (ctx->range())
+    return new ResourceRef(item, name, ctx->range()->first->value,
+                           ctx->range()->last->value);
+  if (ctx->index)
+    return new ResourceRef(item, name, ctx->index->value, ctx->index->value);
+
+  // Handle implicitly defined resource groups.
+  if (ctx->group_and || ctx->group_or) {
+    // We only allow implicitly defined groups in some circumstances.
+    if (resources == nullptr) {
+      spec().ErrorLog(&item, "Implicit Group Definition not allowed");
+      return new ResourceRef(item, name);
+    }
+
+    IdList *members = new IdList;
+    int index = 0;
+    for (auto *member : ctx->ident()) {
+      auto *newmem = VisitIdent(member);
+      newmem->set_index(index++);
+      members->push_back(newmem);
+    }
+
+    // Create a resource group in the scope surrounding the reference.
+    static int implicit_id = 0;
+    name = new Identifier(item, formatv("<group_{0}>", implicit_id++));
+    auto *def = new ResourceDef(item, name, -1, members, nullptr, nullptr);
+    def->set_implicit_group();
+    def->set_group_type(ctx->group_and != nullptr ? GroupType::kUseAll
+                                                  : GroupType::kUseSingle);
+    resources->push_back(def);
+    return new ResourceRef(item, name);
+  }
+
+  return new ResourceRef(item, name);
+}
+
+//-----------------------------------------------------------------------------
+// Process an issue_statement rule:
+//     issue_statement : ISSUE ('(' phase ('..' phase)? ')'? name_list ';'
+// Return a list of resource definitions.
+//-----------------------------------------------------------------------------
+ResourceDefList *MdlVisitor::VisitIssueStatement(IssueStatementCtx *ctx) {
+  auto *defs = new ResourceDefList;
+  Identifier *start = VisitIdent(ctx->start);
+  Identifier *end = VisitIdent(ctx->end);
+  IdList *slots = VisitNameList(ctx->name_list());
+
+  MdlItem item(ctx, current_file_name());
+  for (auto *id : *slots)
+    defs->push_back(new ResourceDef(item, id, -1, -1, start, end));
+
+  return defs;
+}
+
+//-----------------------------------------------------------------------------
+// Process a name_list rule.
+//       name_list: ident (',' ident)* ;
+// Return a list of names.
+//-----------------------------------------------------------------------------
+IdList *MdlVisitor::VisitNameList(NameListCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+
+  auto *names = new IdList;
+  for (auto *name_ctx : ctx->ident())
+    names->push_back(VisitIdent(name_ctx));
+  int index = 0;
+  for (auto *name : *names)
+    name->set_index(index++);
+  return names;
+}
+
+//-----------------------------------------------------------------------------
+// Process a name_list rule.
+//       name_list: ident (',' ident)* ;
+// Return a list of names.
+//-----------------------------------------------------------------------------
+IdList *MdlVisitor::VisitGroupList(GroupListCtx *ctx) {
+  auto *names = new IdList;
+  for (auto *name_ctx : ctx->ident())
+    names->push_back(VisitIdent(name_ctx));
+  int index = 0;
+  for (auto *name : *names)
+    name->set_index(index++);
+  return names;
+}
+
+//-----------------------------------------------------------------------------
+// Process a list of base templates.
+//       base_list: (':' ident)* ;
+//-----------------------------------------------------------------------------
+IdList *MdlVisitor::VisitBaseList(BaseListCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+
+  auto *names = new IdList;
+  for (auto *name_ctx : ctx->ident())
+    names->push_back(VisitIdent(name_ctx));
+  return names;
+}
+
+//-----------------------------------------------------------------------------
+// Process a list of subunit bases, which can be subunit names or strings
+// representing regular expressions of instruction names.
+//       su_base_list: (':' (ident|STRING_LITERAL))* ;
+//-----------------------------------------------------------------------------
+void MdlVisitor::VisitSuBaseList(SuBaseListCtx *ctx, IdList *&bases,
+                                 StringList *&regex) {
+  if (ctx == nullptr)
+    return;
+
+  if (ctx->unit != nullptr) {
+    bases = new IdList;
+    for (auto *base_ctx : ctx->ident())
+      bases->push_back(VisitIdent(base_ctx));
+  }
+
+  if (ctx->regex != nullptr) {
+    regex = new StringList;
+    for (auto *item : ctx->STRING_LITERAL()) {
+      auto expr = item->getText();
+      regex->push_back(expr.substr(1, expr.length() - 2));
+    }
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Process a register_def rule:
+//      register_def : REGISTER register_decl (',' register_decl)* ';' ;
+// A register declaration can declare a set of registers, each of which
+// can be a set of registers. Expand them all to a single vector, and return it.
+//-----------------------------------------------------------------------------
+RegisterDefList *MdlVisitor::VisitRegisterDef(RegisterDefCtx *ctx) {
+  auto *regs = new RegisterDefList;
+
+  // Each register declaration can return a list, so append them together.
+  for (auto *reg_ctx : ctx->register_decl()) {
+    RegisterDefList *regset = VisitRegisterDecl(reg_ctx);
+    regs->insert(regs->end(), regset->begin(), regset->end());
+  }
+  return regs;
+}
+
+//-----------------------------------------------------------------------------
+// Process a register_decl rule:
+//     register_decl: ident | ident '[' range ']' ;
+// If a range of registers is specified, expand to a list.
+//-----------------------------------------------------------------------------
+RegisterDefList *MdlVisitor::VisitRegisterDecl(RegisterDeclCtx *ctx) {
+  auto *regs = new RegisterDefList;
+  Identifier *reg = VisitIdent(ctx->ident());
+  MdlItem item(ctx, current_file_name());
+
+  // If no range was specified, just return a single register.
+  if (ctx->range() == nullptr) {
+    regs->push_back(new RegisterDef(item, reg));
+    return regs; // return the single definition in a vector.
+  }
+
+  // If a range was specified, create a vector of register names.
+  for (int id = ctx->range()->first->value; id <= ctx->range()->last->value;
+       id++) {
+    auto *def = new Identifier(item, reg->name() + std::to_string(id));
+    regs->push_back(new RegisterDef(item, def));
+  }
+
+  return regs; // return the list of names.
+}
+
+//-----------------------------------------------------------------------------
+// Process a register_class rule:
+//      register_class : REGCLASS { register_decl (',' register_decl)* '}' ';' ;
+// A register class declaration creates a collection of register definitions.
+//-----------------------------------------------------------------------------
+RegisterClass *MdlVisitor::VisitRegisterClass(RegisterClassCtx *ctx) {
+  auto *regs = new RegisterDefList;
+  Identifier *name = VisitIdent(ctx->ident());
+
+  // Each register declaration can return a list, so append them together.
+  for (auto *reg_ctx : ctx->register_decl()) {
+    RegisterDefList *regset = VisitRegisterDecl(reg_ctx);
+    regs->insert(regs->end(), regset->begin(), regset->end());
+  }
+
+  MdlItem item(ctx, current_file_name());
+  return new RegisterClass(item, name, regs);
+}
+
+//-----------------------------------------------------------------------------
+// Process an ident rule:
+//     ident : IDENT ;
+// Every identifier creates an Identifier object, with context.
+//-----------------------------------------------------------------------------
+Identifier *MdlVisitor::VisitIdent(IdentCtx *ctx) {
+  if (ctx == nullptr)
+    return nullptr;
+
+  MdlItem item(ctx, current_file_name());
+  return new Identifier(item, ctx->getText());
+}
+
+//-----------------------------------------------------------------------------
+// Process an instruction definition rule:
+// instruction_def : INSTRUCT name=ident
+//                       '(' operand_decl (',' operand_decl)* ');
+//                       '{' (SUBUNIT '(' subunit=ident ')' ';')?
+//                           (DERIVED '(' derived=ident ')' ';')? '}'
+//-----------------------------------------------------------------------------
+const int kOpndNameRequired = -1;
+
+InstructionDef *MdlVisitor::VisitInstructionDef(InstructionDefCtx *ctx) {
+  auto *operands = new OperandDeclList;
+
+  for (auto *opnd_ctx : ctx->operand_decl()) {
+    auto *operand = VisitOperandDecl(opnd_ctx, kOpndNameRequired);
+    // Handle ellispses operands.  For now we don't do anything with these.
+    // TODO(tbd): Figure out how to handle ellipses in the back-end.
+    if (!operand->is_ellipsis())
+      operands->push_back(operand);
+    else if (opnd_ctx != ctx->operand_decl().back())
+      spec().ErrorLog(operand, "Ellipsis must be last declared operand");
+  }
+
+  auto *name = VisitIdent(ctx->name);
+  auto *subunit = VisitNameList(ctx->subunit);
+  auto *derived = VisitNameList(ctx->derived);
+
+  MdlItem item(ctx, current_file_name());
+  return new InstructionDef(item, name, operands, subunit, derived);
+}
+
+//-----------------------------------------------------------------------------
+// Process an operand declaration rule (used in instructions and operands) :
+//     operand_decl : type=ident (name=ident)?
+//-----------------------------------------------------------------------------
+OperandDecl *MdlVisitor::VisitOperandDecl(OperandDeclCtx *ctx, int opnd_id) {
+  auto *type = VisitIdent(ctx->type);
+  auto *name = VisitIdent(ctx->name);
+  bool is_ellipsis = ctx->ellipsis != nullptr;
+  bool is_input = ctx->input != nullptr;
+  bool is_output = ctx->output != nullptr;
+  MdlItem item(ctx, current_file_name());
+
+  // If an operand name is not provided, we sythesize a name based
+  // on the component index.
+  if (name == nullptr && !is_ellipsis && opnd_id != kOpndNameRequired)
+    name = new Identifier(item, std::to_string(opnd_id));
+  if (name == nullptr)
+    name = new Identifier(item, "");
+
+  // We only allow ellipses operands for instructions.
+  if (is_ellipsis && opnd_id != kOpndNameRequired) {
+    spec().ErrorLog(&item, "Ellipsis not allowed in operand definitions");
+    name = type = new Identifier(item, "...");
+  }
+  return new OperandDecl(item, type, name, is_ellipsis, is_input, is_output);
+}
+
+//-----------------------------------------------------------------------------
+// Process an operand definition rule:
+// operand_def : OPERAND name=ident '(' (operand_decl (',' operand_decl)*)? ');
+//                          '{' (operand_type | operand_attribute)* '}' ';'?
+//-----------------------------------------------------------------------------
+OperandDef *MdlVisitor::VisitOperandDef(OperandDefCtx *ctx) {
+  auto *operands = new OperandDeclList;
+  Identifier *type = nullptr;
+  auto *attributes = new OperandAttributeList;
+  auto *name = VisitIdent(ctx->name);
+  MdlItem item(ctx, current_file_name());
+
+  int opnd_id = 0;
+  for (auto *opnd_ctx : ctx->operand_decl())
+    operands->push_back(VisitOperandDecl(opnd_ctx, opnd_id++));
+
+  if (!ctx->operand_type().empty()) {
+    if (ctx->operand_type().size() != 1)
+      spec().ErrorLog(&item, "Only one type specification allowed");
+    else
+      type = VisitIdent(ctx->operand_type()[0]->type);
+  }
+
+  for (auto *attribute_ctx : ctx->operand_attribute()) {
+    auto *attr = VisitOperandAttribute(attribute_ctx);
+    attributes->insert(attributes->end(), attr->begin(), attr->end());
+  }
+
+  return new OperandDef(item, name, operands, type, attributes, nullptr);
+}
+
+//-----------------------------------------------------------------------------
+// Process a derived operand definition rule:
+// derived_operand_def : OPERAND name=ident ':' base=ident
+//                          '{' (operand_type | operand_attribute)* '}' ';'?
+//-----------------------------------------------------------------------------
+OperandDef *MdlVisitor::VisitDerivedOperandDef(DerivedOperandDefCtx *ctx) {
+  auto *operands = new OperandDeclList; // always empty, by definition
+  Identifier *type = nullptr;
+  auto *attributes = new OperandAttributeList;
+  auto *name = VisitIdent(ctx->name);
+  auto *bases = VisitBaseList(ctx->base_list());
+  MdlItem item(ctx, current_file_name());
+
+  if (!ctx->operand_type().empty()) {
+    if (ctx->operand_type().size() != 1)
+      spec().ErrorLog(&item, "Only one type specification allowed");
+    else
+      type = VisitIdent(ctx->operand_type()[0]->type);
+  }
+
+  for (auto *attribute_ctx : ctx->operand_attribute()) {
+    auto *attr = VisitOperandAttribute(attribute_ctx);
+    attributes->insert(attributes->end(), attr->begin(), attr->end());
+  }
+
+  return new OperandDef(item, name, operands, type, attributes, bases);
+}
+
+//-----------------------------------------------------------------------------
+// Process an operand attribute definition rule:
+// operand_attribute :
+//        (predicate=name_list ':')? operand_attribute_stmt
+//      | predicate=name_list ':' '{' operand_attribute_stmt* '}' ';'?
+//-----------------------------------------------------------------------------
+OperandAttributeList *
+MdlVisitor::VisitOperandAttribute(OperandAttributeCtx *ctx) {
+  auto *predicate = VisitNameList(ctx->predicate);
+  auto *attributes = new OperandAttributeList;
+
+  for (auto *stmt : ctx->operand_attribute_stmt())
+    attributes->push_back(VisitOperandAttributeStmt(stmt, predicate));
+
+  return attributes;
+}
+
+//-----------------------------------------------------------------------------
+// Process an operand attribute definition rule:
+// operand_attribute_stmt :
+//     ATTRIBUTE ident '=' snumber
+//                            (IF type '[' pred_value (',' pred_value)* ']' ';'
+//-----------------------------------------------------------------------------
+OperandAttribute *
+MdlVisitor::VisitOperandAttributeStmt(OperandAttributeStmtCtx *ctx,
+                                      IdList *predicate) {
+  auto *name = VisitIdent(ctx->name);
+  std::vector<int> *values;
+  MdlItem item(ctx, current_file_name());
+
+  // Process either a single value, or a tuple of values.
+  if (ctx->value) {
+    values = new std::vector<int>;
+    values->push_back(ctx->snumber()->value);
+  } else {
+    values = VisitTuple(ctx->values);
+  }
+
+  std::string type = ctx->type ? ctx->type->IDENT()->getText() : "";
+  if (!type.empty() && type != "label" && type != "address" && type != "lit")
+    spec().ErrorLog(&item, "Invalid predicate type: {0}", type);
+
+  auto *pred_values = new PredValueList;
+
+  for (auto *pred_value_ctx : ctx->pred_value())
+    pred_values->push_back(VisitPredValue(pred_value_ctx));
+
+  return new OperandAttribute(item, name, values, type, pred_values, predicate);
+}
+
+//-----------------------------------------------------------------------------
+// Process a tuple:  tuple: '[' snumber (',' snumber)* ']'
+//-----------------------------------------------------------------------------
+std::vector<int> *MdlVisitor::VisitTuple(TupleCtx *ctx) {
+  auto values = new std::vector<int>;
+  for (auto *snumber : ctx->snumber())
+    values->push_back(snumber->value);
+  return values;
+}
+
+//-----------------------------------------------------------------------------
+// Process a predicate value:
+//     value=snumber | low=snumber '..' high=snumber | '{' mask=number '}'
+//-----------------------------------------------------------------------------
+PredValue *MdlVisitor::VisitPredValue(PredValueCtx *ctx) {
+  MdlItem item(ctx, current_file_name());
+  if (ctx->value)
+    return new PredValue(item, ctx->value->value, ctx->value->value);
+
+  if (ctx->mask)
+    return new PredValue(item, ctx->mask->value);
+
+  if (ctx->low->value <= ctx->high->value)
+    return new PredValue(item, ctx->low->value, ctx->high->value);
+
+  spec().ErrorLog(&item, "Invalid value range: {0}..{1}", ctx->low->value,
+                  ctx->high->value);
+  return new PredValue(item, 0, 0); // dummy value
+}
+
+//-----------------------------------------------------------------------------
+// Convert a predicate expression string to an internal expression type.
+//-----------------------------------------------------------------------------
+static PredOp NameToOp(std::string name) {
+  static auto *predicate_ops = new std::unordered_map<std::string, PredOp>(
+      {{kTrue, PredOp::kTrue},
+       {kFalse, PredOp::kFalse},
+       {kCheckAny, PredOp::kCheckAny},
+       {kCheckAll, PredOp::kCheckAll},
+       {kCheckNot, PredOp::kCheckNot},
+       {kCheckOpcode, PredOp::kCheckOpcode},
+       {kCheckIsRegOperand, PredOp::kCheckIsRegOperand},
+       {kCheckIsImmOperand, PredOp::kCheckIsImmOperand},
+       {kCheckZeroOperand, PredOp::kCheckZeroOperand},
+       {kCheckFunctionPredicate, PredOp::kCheckFunctionPredicate},
+       {kCheckFunctionPredicateWithTII, PredOp::kCheckFunctionPredicateWithTII},
+       {kCheckNumOperands, PredOp::kCheckNumOperands},
+       {kCheckRegOperand, PredOp::kCheckRegOperand},
+       {kCheckInvalidRegOperand, PredOp::kCheckInvalidRegOperand},
+       {kCheckImmOperand, PredOp::kCheckImmOperand},
+       {kCheckSameRegOperand, PredOp::kCheckSameRegOperand},
+       {kOpcodeSwitchStmt, PredOp::kOpcodeSwitchStmt},
+       {kOpcodeSwitchCase, PredOp::kOpcodeSwitchCase},
+       {kReturnStatement, PredOp::kReturnStatement}});
+
+  if (predicate_ops->count(name))
+    return (*predicate_ops)[name];
+  return PredOp::kEmpty;
+}
+
+//-----------------------------------------------------------------------------
+// Process a predicate definition:
+//    predicate_def : PREDICATE ident ':' predicate_op? ';'
+//-----------------------------------------------------------------------------
+void MdlVisitor::VisitPredicateDef(PredicateDefCtx *ctx, MdlSpec &spec) {
+  auto name = ctx->ident()->getText();
+  PredicateOpCtx *def = ctx->predicate_op();
+
+  // Handle predicate definitions that don't have a predicate expression.
+  // (Mostly this is handling "TruePred" and "FalsePred").
+  if (def == nullptr) {
+    MdlItem item(ctx, current_file_name());
+    spec.EnterPredicate(name, new PredExpr(item, NameToOp(name)));
+    return;
+  }
+
+  // If we see a predicate expression, add it to the table.
+  spec.EnterPredicate(name, VisitPredicateOp(def));
+}
+
+//-----------------------------------------------------------------------------
+// Process a predicate expression:
+//    predicate_op : pred_opcode '<' pred_opnd (',' pred_opnd)* '>'
+//                 | CODE_ESCAPE | ident
+//-----------------------------------------------------------------------------
+PredExpr *MdlVisitor::VisitPredicateOp(PredicateOpCtx *ctx) {
+  MdlItem item(ctx, current_file_name());
+
+  // Handle the easy ones first: names and code escapes.
+  if (ctx->ident()) {
+    auto expr = NameToOp(ctx->ident()->getText());
+    if (expr != PredOp::kEmpty)
+      return new PredExpr(item, expr);
+    return new PredExpr(item, PredOp::kName, ctx->ident()->getText());
+  }
+
+  if (ctx->code_escape())
+    return new PredExpr(item, PredOp::kCode, ctx->code_escape()->getText());
+
+  // If definition was empty, just return.
+  if (ctx->pred_opcode() == nullptr)
+    return nullptr;
+
+  // Create an internal representation of a predicate expression.
+  auto opcode = NameToOp(ctx->pred_opcode()->getText());
+  std::vector<PredExpr *> opnds;
+  for (auto *opnd : ctx->pred_opnd())
+    opnds.push_back(VisitPredicateOpnd(opnd));
+
+  // Error check the number of operands.
+  unsigned min_opnds, max_opnds;
+  switch (opcode) {
+  default:
+  case PredOp::kCheckOpcode:
+  case PredOp::kOpcodeSwitchStmt:
+  case PredOp::kCheckAll:
+  case PredOp::kCheckAny:
+    return new PredExpr(item, opcode, opnds);
+
+  case PredOp::kReturnStatement:
+  case PredOp::kCheckNot:
+  case PredOp::kCheckIsRegOperand:
+  case PredOp::kCheckIsImmOperand:
+  case PredOp::kCheckInvalidRegOperand:
+  case PredOp::kCheckZeroOperand:
+  case PredOp::kCheckNumOperands:
+    min_opnds = max_opnds = 1;
+    break;
+
+  case PredOp::kCheckFunctionPredicate:
+    min_opnds = 2;
+    max_opnds = 2;
+    break;
+  case PredOp::kCheckFunctionPredicateWithTII:
+    min_opnds = 2;
+    max_opnds = 3; // TII operand is optional
+    break;
+
+  case PredOp::kCheckRegOperand:
+    min_opnds = 2;
+    max_opnds = 3;
+    break;
+  case PredOp::kCheckImmOperand:
+    min_opnds = 1;
+    max_opnds = 3;
+    break;
+
+  case PredOp::kOpcodeSwitchCase:
+    min_opnds = max_opnds = 2;
+    break;
+  }
+
+  if (opnds.size() < min_opnds)
+    spec().ErrorLog(&item, "Missing operands: ({0} expected)", min_opnds);
+  if (opnds.size() > max_opnds)
+    spec().ErrorLog(&item, "Extra operands: ({0} expected)", max_opnds);
+
+  return new PredExpr(item, opcode, opnds);
+}
+
+//-----------------------------------------------------------------------------
+// Process a predicate operand:
+//    pred_opnd : name=ident | snumber | STRING_LITERAL | predicate_op |
+//                '[' opcode=ident (',' ident)* ']' | operand
+//-----------------------------------------------------------------------------
+PredExpr *MdlVisitor::VisitPredicateOpnd(PredicateOpndCtx *ctx) {
+  MdlItem item(ctx, current_file_name());
+  if (ctx->name)
+    return new PredExpr(item, PredOp::kName, ctx->name->getText());
+  if (ctx->operand())
+    return new PredExpr(item, PredOp::kOperandRef,
+                        VisitOperand(ctx->operand()));
+  if (ctx->STRING_LITERAL())
+    return new PredExpr(item, PredOp::kString,
+                        ctx->STRING_LITERAL()->getText());
+
+  if (ctx->snumber())
+    return new PredExpr(item, PredOp::kNumber, ctx->snumber()->getText());
+
+  if (ctx->predicate_op())
+    return VisitPredicateOp(ctx->predicate_op());
+
+  if (ctx->opcode_list) {
+    std::vector<PredExpr *> opcodes;
+    for (auto opcode : ctx->ident())
+      opcodes.push_back(new PredExpr(item, PredOp::kString, opcode->getText()));
+    return new PredExpr(item, PredOp::kCheckOpcode, opcodes);
+  }
+
+  return nullptr;
+}
+
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1848,9 +1848,10 @@
      << "    const MCWriteProcResEntry *WPR,\n"
      << "    const MCWriteLatencyEntry *WL,\n"
      << "    const MCReadAdvanceEntry *RA, const InstrStage *IS,\n"
-     << "    const unsigned *OC, const unsigned *FP) :\n"
+     << "    const unsigned *OC, const unsigned *FP,\n"
+     << "    mdl::CpuTableDef *MDL) :\n"
      << "      MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD,\n"
-     << "                      WPR, WL, RA, IS, OC, FP) { }\n\n"
+     << "                      WPR, WL, RA, IS, OC, FP, MDL) { }\n\n"
      << "  unsigned resolveVariantSchedClass(unsigned SchedClass,\n"
      << "      const MCInst *MI, const MCInstrInfo *MCII,\n"
      << "      unsigned CPUID) const override {\n"
@@ -1925,7 +1926,8 @@
 
   OS << "\nstatic inline MCSubtargetInfo *create" << Target
      << "MCSubtargetInfoImpl("
-     << "const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS) {\n";
+     << "const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,\n"
+     << "                    mdl::CpuTableDef *CpuTable) {\n";
   OS << "  return new " << Target
      << "GenMCSubtargetInfo(TT, CPU, TuneCPU, FS, ";
   if (NumFeatures)
@@ -1947,6 +1949,7 @@
        << Target << "ForwardingPaths";
   } else
     OS << "nullptr, nullptr, nullptr";
+  OS << ", CpuTable";
   OS << ");\n}\n\n";
 
   OS << "} // end namespace llvm\n\n";
@@ -1975,7 +1978,7 @@
      << "} // end namespace " << Target << "_MC\n\n";
   OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
      << "  explicit " << ClassName << "(const Triple &TT, StringRef CPU, "
-     << "StringRef TuneCPU, StringRef FS);\n"
+     << "StringRef TuneCPU, StringRef FS, const mdl::CpuTableDef *CpuTable);\n"
      << "public:\n"
      << "  unsigned resolveSchedClass(unsigned SchedClass, "
      << " const MachineInstr *DefMI,"
@@ -2019,7 +2022,7 @@
   }
 
   OS << ClassName << "::" << ClassName << "(const Triple &TT, StringRef CPU, "
-     << "StringRef TuneCPU, StringRef FS)\n"
+     << "StringRef TuneCPU, StringRef FS, const mdl::CpuTableDef *CpuTable)\n"
      << "  : TargetSubtargetInfo(TT, CPU, TuneCPU, FS, ";
   if (NumFeatures)
     OS << "ArrayRef(" << Target << "FeatureKV, " << NumFeatures << "), ";
@@ -2040,6 +2043,7 @@
        << Target << "ForwardingPaths";
   } else
     OS << "nullptr, nullptr, nullptr";
+  OS << ", CpuTable";
   OS << ") {}\n\n";
 
   EmitSchedModelHelpers(ClassName, OS);
diff --git a/llvm/utils/TdScan/CMakeLists.txt b/llvm/utils/TdScan/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/CMakeLists.txt
@@ -0,0 +1,4 @@
+# Cmake for building as part of LLVM
+set(LLVM_LINK_COMPONENTS Support)
+add_llvm_utility(tdscan scan.cpp arch.cpp debug.cpp register.cpp output.cpp)
+
diff --git a/llvm/utils/TdScan/arch.cpp b/llvm/utils/TdScan/arch.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/arch.cpp
@@ -0,0 +1,911 @@
+//===- arch.cpp - Extract architecture model from tablegen records  -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Extract the MDL model from the information scraped from TableGen.
+//
+// We use ProcessorModel, SchedModel, ProcResource, SchedReadWrite, and
+// InstRW records to build an internal representation of an MDL machine model.
+// If Itineraries are present, we also process FuncUnit, ProcessorItinerary,
+// InstrItinData, InstrStage, and Bypass objects to build the internal model
+// for instruction bundling.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "scan.h"
+#include "llvm/Support/Regex.h"
+
+namespace mpact {
+namespace mdl {
+namespace scan {
+
+//-------------------------------------------------------------------------
+// Annotate functional units which have resource groups with a full set of
+// the members of the resource group. We assume that any resource can be
+// a resource group, so we need to recur on each member of a group.
+//-------------------------------------------------------------------------
+void MachineDescription::PopulateResourceGroup(ProcResource *func) {
+  // If a functional unit doesn't have a resource group, just add its name
+  // to its (otherwise empty) group.
+  if (!func->is_resource_group()) {
+    func->resource_group().insert(func->name());
+    return;
+  }
+  // If we've already processed this object, return.
+  if (!func->resource_group().empty())
+    return;
+
+  for (const auto &item : func->group_names()) {
+    PopulateResourceGroup(proc_resources_[item]);
+    auto &members = proc_resources_[item]->resource_group();
+    func->resource_group().insert(members.begin(), members.end());
+  }
+}
+
+//-------------------------------------------------------------------------
+// Populate CPUs with Functional units and issue width.
+//-------------------------------------------------------------------------
+void MachineDescription::PopulateCPUsWithFUs() {
+  // Annotate SchedModels with cpu definitions that use them.
+  for (const auto &[name, cpu] : cpus_)
+    sched_models_[cpu->sched_model()]->add_cpu(name);
+
+  // Expand ProcResourceGroups for each functional unit.
+  for (const auto &[name, unit] : proc_resources_)
+    PopulateResourceGroup(unit);
+
+  // If a func_unit has a super unit, add that functional unit to the super
+  // unit as a subunit. If a functional unit has a SchedModel and not a
+  // Super unit, add it to that SchedModel's list of functional units.
+  for (auto &[name, unit] : proc_resources_) {
+    if (!unit->fu_super().empty()) {
+      proc_resources_[unit->fu_super()]->add_child_unit(unit->name());
+    } else if (!unit->sched_model().empty()) {
+      for (const auto &item : unit->resource_group())
+        if (proc_resources_[item]->fu_super().empty())
+          sched_models_[unit->sched_model()]->add_func_unit(item);
+    }
+  }
+
+  // For every SchedWrite object, add its ProcResources to its SchedModel.
+  for (const auto &[name, rw_unit] : rw_units_)
+    if (rw_unit->is_write())
+      for (auto &[model, sched_model] : rw_unit->sched_model_info())
+        for (auto &resource : sched_model.func_units())
+          if (proc_resources_[resource]->fu_super().empty())
+            for (const auto &unit : proc_resources_[resource]->resource_group())
+              if (proc_resources_[unit]->fu_super().empty())
+                sched_models_[model]->add_func_unit(unit);
+
+  // For each functional unit instance, create a set of super-unit names.
+  for (auto &[name, res] : proc_resources())
+    CreateSuperUnitNames(name);
+}
+
+// A "Super" unit is a ProcResource (ie functional unit in MDL) which is
+// named by other ProcResources as a "Super" unit, forming a tree-like
+// hierarchy of ProcResources through "Super" and "child" links. Each
+// ProcResource has one or more "instances", and an arbitrary set of
+// children - which also have arbitrary instances and children. A ProcResource
+// can have only a single Super unit, so the hierarchy forms a tree (and
+// importantly, not a DAG nor a general DFG).
+//
+// The instances of a top-level Super ProcResource model a set of parallel
+// execution streams. Its child units must be (recursively) allocated across
+// its instances. Each instance of a ProcResource is a collection of
+// functional units that are associated with the ProcResource's execution
+// stream. In the MDL, this is modeled as a set of functional units each with
+// a set of base units.
+//
+// So briefly, the algorithm does a depth-first walk of each ProcResource tree,
+// and allocates children instances to parent instances. In the end, each
+// top-level Super ProcResource has a set of functional unit template names
+// (usually derived templates: a:b:c:d) correspoinding to each of its instances.
+ProcResource *
+MachineDescription::CreateSuperUnitNames(const std::string &unit_name) {
+  auto *super = proc_resources_[unit_name];
+  auto unit = unit_name;
+  if (super->buffer_size() > -1)
+    unit += formatv("<{0}>", super->buffer_size());
+
+  // No need to process a functional unit more than once - we don't necessarily
+  // process them in top-down order.
+  if (!super->super_names().empty())
+    return super;
+
+  // First initialize all the instances of the Super FU with its own name.
+  for (int i = 0; i < super->num_instances(); i++)
+    super->super_names().emplace_back(unit);
+
+#if 0
+  // Note: this is clever, but when the bases have buffers, we can't express
+  // this in a functional unit /template/, so we can carry more information if
+  // we just define the instance has having bases.  It might make sense to
+  // create a template if the processor doesn't have a reorder queue, but
+  // its purely a cosmetic difference even in that case.
+
+  // If there's only one instance of this unit, just add all of its child
+  // units to its base.
+  if (super->num_instances() == 1) {
+    for (auto &base_name : super->child_func_units()) {
+      for (auto &name : CreateSuperUnitNames(base_name)->super_names())
+        super->add_base_unit(name.name());
+    }
+    return super;
+  }
+#endif
+
+  // Recur over each child unit, then allocate each child's instances to this
+  // ProcResource's instances such that the child instances are allocated
+  // evenly across the least congested parent instances. After allocating all
+  // of a child's instances, sort the parent's instances by the number of
+  // children allocated to them.
+  for (auto &base_name : super->child_func_units()) {
+    auto *base = CreateSuperUnitNames(base_name);
+    int super_sz = super->num_instances();
+    int base_sz = base->num_instances();
+    int count = std::min(super_sz, base_sz);
+    for (int first = 0; first < count; first += count)
+      for (int i = first; i < count + first; i++)
+        super->super_names()[i].add_name(base->super_names()[i].name());
+
+    if (base_sz != super_sz)
+      std::sort(super->super_names().begin(), super->super_names().end());
+  }
+  return super;
+}
+
+//-------------------------------------------------------------------------
+// Add a new SchedModel to the set of information about the target.
+//-------------------------------------------------------------------------
+void SchedReadWrite::AddSchedModel(const std::string &sched_model,
+                                   const std::vector<std::string> &func_units,
+                                   const std::vector<std::string> &res_cycles,
+                                   const std::vector<std::string> &start_cycles,
+                                   const std::string &latency,
+                                   const std::string &micro_ops, bool is_write,
+                                   bool is_begin_group, bool is_end_group,
+                                   bool is_single_issue, bool retire_ooo) {
+  auto model = FixAttribute(sched_model);
+
+  assert(!sched_model_info_.count(model) &&
+         formatv("Duplicate SchedModel:{0}", model).c_str());
+
+  auto &info = sched_model_info()[model];
+  info.latency = std::stoi(latency);
+  info.micro_ops = std::stoi(micro_ops);
+  info.is_begin_group = is_begin_group;
+  info.is_end_group = is_end_group;
+  info.is_single_issue = is_single_issue;
+  info.retire_ooo = retire_ooo;
+  info.func_units() = func_units;
+  for (auto &cycle : res_cycles)
+    info.res_cycles.push_back(std::stoi(cycle));
+  for (auto &cycle : start_cycles)
+    info.start_cycles.push_back(std::stoi(cycle));
+}
+
+//-------------------------------------------------------------------------
+// Add a new SchedReadWrite to the set of information about the target.
+//-------------------------------------------------------------------------
+void MachineDescription::AddSchedReadWrite(
+    const std::string &name, const std::string &sched_model,
+    const std::vector<std::string> &func_units,
+    const std::vector<std::string> &res_cycles,
+    const std::vector<std::string> &start_cycles, const std::string &latency,
+    const std::string &micro_ops, bool is_write, bool is_begin_group,
+    bool is_end_group, bool is_single_issue, bool retire_ooo) {
+  if (!rw_units_.count(name))
+    rw_units_[name] = new SchedReadWrite(
+        name, sched_model, func_units, res_cycles, start_cycles, latency,
+        micro_ops, is_write, is_begin_group, is_end_group, is_single_issue,
+        retire_ooo);
+  else
+    rw_units_[name]->AddSchedModel(
+        sched_model, func_units, res_cycles, start_cycles, latency, micro_ops,
+        is_write, is_begin_group, is_end_group, is_single_issue, retire_ooo);
+}
+
+//-------------------------------------------------------------------------
+// Return the operand type.
+//-------------------------------------------------------------------------
+std::string MachineDescription::GetOperandType(const std::string &opnd) {
+  std::size_t pos = opnd.find(' ');
+  return opnd.substr(0, pos);
+}
+
+//-------------------------------------------------------------------------
+// Update Instructions's implicit input & output operands with that of its
+// base instruction.
+//-------------------------------------------------------------------------
+void MachineDescription::AddImplicitOperands() {
+  for (const auto &[name, instruct] : instructions_) {
+    auto &uses = instruct->implicit_uses_;
+    auto &defs = instruct->implicit_defs_;
+    auto base_instr_name = instruct->base_instr_;
+
+    if (!base_instr_name.empty()) {
+      auto base_instr = instructions_[base_instr_name];
+      auto base_uses = base_instr->implicit_uses_;
+      auto base_defs = base_instr->implicit_defs_;
+
+      uses.insert(uses.end(), base_uses.begin(), base_uses.end());
+      defs.insert(defs.end(), base_defs.begin(), base_defs.end());
+
+      assert(base_instr->base_instr_.empty() &&
+             "Unexpected multiple inheritance of base instr");
+    }
+  }
+}
+
+//-------------------------------------------------------------------------
+// Find operands that are referenced by instructions (or other operands).
+//-------------------------------------------------------------------------
+void MachineDescription::FindReferencedOperands() {
+  for (const auto &[name, instruct] : instructions_)
+    if (instruct->HasSubunits() || ignore_subunits()) {
+      for (auto &out : instruct->outs_) {
+        std::string op = GetOperandType(out);
+        if (operands_.count(op)) {
+          operands_[op]->set_referenced();
+        } else if (register_class_list_.count(op)) {
+          register_class_list_[op]->set_referenced();
+        } else if (pointer_like_reg_class_.count(op)) {
+          register_class_list_[op] = new RegisterClass(op, nullptr);
+          register_class_list_[op]->set_referenced();
+        } else if (op != "...") {
+          std::cerr << "Unrecognized operand type:" << op << "\n";
+        }
+      }
+      for (auto &in : instruct->ins_) {
+        std::string op = GetOperandType(in);
+        if (operands_.count(op)) {
+          operands_[op]->set_referenced();
+        } else if (register_class_list_.count(op)) {
+          register_class_list_[op]->set_referenced();
+        } else if (pointer_like_reg_class_.count(op)) {
+          register_class_list_[op] = new RegisterClass(op, nullptr);
+          register_class_list_[op]->set_referenced();
+        } else if (op != "...") {
+          std::cerr << "Unrecognized operand type:" << op << "\n";
+        }
+      }
+    }
+
+  // Annotate operands and register classes that are referenced from
+  // referenced operands.  If a name is not found in the operand types set,
+  // check the register class set.  If it's not there, check the value_type
+  // set.  If found there, create an operand for it.
+  bool change = true;
+  while (change) {
+    change = false;
+    for (const auto &[name, operand] : operands_)
+      for (auto &opnd : operand->ops()) {
+        std::string op = GetOperandType(opnd);
+        if (operands_.count(op) && !operands_[op]->referenced()) {
+          operands_[op]->set_referenced();
+          change = true;
+        } else if (register_class_list_.count(op)) {
+          register_class_list_[op]->set_referenced();
+        } else if (pointer_like_reg_class_.count(op)) {
+          register_class_list_[op] = new RegisterClass(op, nullptr);
+          register_class_list_[op]->set_referenced();
+        } else if (value_type_set_.count(op)) {
+          std::vector<std::string> ops;
+          operands_[op] = new Operand(op, ops, op);
+          operands_[op]->set_referenced();
+        }
+      }
+  }
+}
+
+bool MachineDescription::IsReadUnit(const std::string &name,
+                                    const std::string &model) {
+  if (auto *res = GetSchedReadWrite(name, model))
+    return res->is_read();
+  if (IsSchedVariant(name))
+    return sched_variants_[name]->is_read();
+  return false;
+}
+
+// To find the functional units associated with a read, find the set of
+// functional units associated with its "valid writes".
+void MachineDescription::FindReadUnits(const std::string &name,
+                                       const std::string &model,
+                                       ForwardUnits &units,
+                                       const std::string &predicate) {
+  if (auto *read = GetSchedReadWrite(name, model))
+    if (auto *read_ref = read->HasModel(model))
+      for (auto &write : read_ref->valid_writes())
+        FindWriteUnits(write, model, units, read_ref->latency, predicate);
+
+  if (IsSchedVariant(name))
+    for (auto &variant : sched_variants_[name]->variants())
+      for (auto &read_ref : sched_vars_[variant]->selected())
+        FindReadUnits(read_ref, model, units,
+                      sched_vars_[variant]->predicate());
+}
+
+// Find the set of functional units based on the instructions write units.
+void MachineDescription::FindWriteUnits(const std::string &name,
+                                        const std::string &model,
+                                        ForwardUnits &units, int latency,
+                                        const std::string &predicate) {
+  if (auto *res = GetSchedReadWrite(name, model))
+    if (auto *write_ref = res->HasModel(model))
+      for (auto &fu : write_ref->func_units())
+        units.emplace(fu, latency, predicate);
+
+  if (IsSchedVariant(name))
+    for (auto &variant : sched_variants_[name]->variants())
+      for (auto &write_ref : sched_vars_[variant]->selected())
+        FindWriteUnits(write_ref, model, units, latency, predicate);
+}
+
+// Collect forwarding information for each InstRW record.
+void MachineDescription::ExtractForwardingInfo(InstRW *instrw) {
+  if (!gen_forwarding_info())
+    return;
+
+  auto const &model = instrw->sched_model();
+
+  // Accumulate the set of functional units for this InstRW record.
+  // Scan the resource writes and note the FUs used by each of them.
+  ForwardUnits write_units;
+  for (const auto &res_name : instrw->rw_units())
+    if (!IsReadUnit(res_name, model))
+      FindWriteUnits(res_name, model, write_units, 0, "");
+
+  // Add each write unit to the set of functional units for this instruction.
+  for (const auto &[unit, latency, pred] : write_units)
+    instrw->func_units().insert(unit);
+
+  // Scan all the read units, and accumulate the set of functional units
+  // that forward for each read.
+  int res_id = 0;
+  for (const auto &res_name : instrw->rw_units()) {
+    if (IsReadUnit(res_name, model)) {
+      ForwardUnits fwd_units;
+      FindReadUnits(res_name, model, fwd_units, "");
+
+      for (auto &[to, ignore_lat, ignore_pred] : write_units)
+        for (auto &[from, latency, pred] : fwd_units) {
+          instrw->forwarding_info().emplace(res_id, latency, from, to, pred);
+          forwarding_network_.AddEdge(model, from, to, latency);
+        }
+    }
+    res_id++;
+  }
+}
+
+// Expand an instruction's SchedReadWrite list to a set of InstRW records.
+// The trick here is to find the model associated with the rw_unit.
+// The "model" parameter is optional - if a valid model is passed in, we only
+// generate InstRW for cases where the model in the InstRW matches the
+// passed-in model.
+// If we create a new InstRW, extract any forwarding information from it.
+void MachineDescription::GenerateInstRWs(
+    Instruction *instr, const std::vector<std::string> &rw_units,
+    const std::string &first, const std::string &model) {
+  if (rw_units.empty())
+    return;
+
+  const auto rw_name = !first.empty() ? first : rw_units[0];
+
+  // Check for aliases first, which typically have specific models they
+  // apply to. We only add InstRWs for these if the model matches. Since some
+  // rw_names are both aliases and rw_units, we fall through to the
+  // SchedReadWrite case after checking aliases.
+  if (IsSchedAliasMatch(rw_name)) {
+    for (const auto &[alias_model, name] : sched_alias_matches_[rw_name]) {
+      if (alias_model == model) {
+        auto *instrw = new InstRW("alias", rw_units, alias_model, {}, {});
+        ExtractForwardingInfo(instrw);
+        instr->add_inst_rw(instrw);
+      }
+    }
+  }
+
+  // If there's no model passed in, we generate InstRWs for every model
+  // associated with the rw_name.
+  if (IsSchedReadWrite(rw_name)) {
+    for (const auto &[rw_model, item] : rw_units_[rw_name]->sched_model_info())
+      if (model.empty() || model == rw_model) {
+        auto *instrw = new InstRW("rw", rw_units, rw_model, {}, {});
+        ExtractForwardingInfo(instrw);
+        instr->add_inst_rw(instrw);
+      }
+    return;
+  }
+
+  if (IsSchedVariant(rw_name)) {
+    const auto &var_model = sched_variants_[rw_name]->sched_model();
+    if (model.empty() || var_model == model) {
+      auto *instrw = new InstRW("var", rw_units, var_model, {}, {});
+      ExtractForwardingInfo(instrw);
+      instr->add_inst_rw(instrw);
+    }
+    return;
+  }
+
+  if (IsWriteSequence(rw_name)) {
+    const auto &seq_model = write_sequences_[rw_name]->sched_model();
+    const auto &first_unit = write_sequences_[rw_name]->writes()[0];
+    if (model.empty())
+      return GenerateInstRWs(instr, rw_units, first_unit, seq_model);
+
+    auto *instrw = new InstRW("dup", rw_units, seq_model, {}, {});
+    ExtractForwardingInfo(instrw);
+    instr->add_inst_rw(instrw);
+  }
+}
+
+// Given an instruction and either its "ins" or "outs" operand list, flatten
+// the list to include every component of the operand.
+std::vector<std::string> MachineDescription::FlattenInstrOperands(
+    Instruction *instr, const std::vector<std::string> &operands) {
+  std::vector<std::string> opnds;
+  for (const auto &opnd : operands) {
+    auto pos = opnd.find("  ");
+    if (pos != std::string::npos) {
+      std::string prefix = opnd.substr(0, pos);
+      if (IsOperand(prefix) && operands_[prefix]->ops().size() > 1) {
+        for (unsigned idx = 0; idx < operands_[prefix]->ops().size(); idx++)
+          opnds.push_back(formatv("{0}.{1}", opnd, idx));
+        continue;
+      }
+    }
+    opnds.push_back(opnd);
+  }
+  return opnds;
+}
+
+// Helper function to determine if a regular expression has a prefix that we
+// can search for. Generally, this is anything up to the first metacharacter.
+// However, if the expression has a top level | or ? operator, we can't
+// define a prefix.
+static std::string GetPrefix(std::string &regex) {
+  static const char meta[] = "()^$*+?.[]\\{}";
+  auto first_meta = regex.find_first_of(meta);
+  if (first_meta == std::string::npos)
+    return regex;
+
+  int param = 0;
+  for (char ch : regex) {
+    if (ch == '(')
+      param++;
+    else if (ch == ')')
+      param--;
+    else if ((ch == '|' || ch == '?') && param == 0)
+      return "";
+  }
+
+  return regex.substr(0, first_meta);
+}
+
+// An instruction definition might not directly specify the read/write
+// units, but they are specified indirectly using InstRW records. InstRW records
+// associate instructions, specified using regexes, to rw-units for a particular
+// schedule model.
+void MachineDescription::ProcessInstRW() {
+  // Iterate over all the instregex entries in InstrRW records, and expand
+  // them to lists of matched instruction names. Cache the lists so we only
+  // match expressions once (there tend to be a lot of identical expressions).
+  std::map<std::string, std::vector<std::string>> regex_dict;
+
+  for (InstRW *instrw : instrw_info_) {
+    for (auto regex : instrw->instregex()) {
+      auto prefix = GetPrefix(regex);
+      auto pattern = regex.substr(prefix.size());
+
+      std::optional<llvm::Regex> rex;
+      if (!pattern.empty()) {
+        if (pattern[0] != '^')
+          pattern = formatv("^({0})", pattern);
+        rex = llvm::Regex(pattern);
+      }
+
+      auto end = instructions_.end();
+      auto begin = instructions_.begin();
+      if (!prefix.empty())
+        begin = instructions_.lower_bound(prefix);
+
+      if (!regex_dict.count(regex)) {
+        std::vector<std::string> matches;
+
+        if (prefix.empty()) {
+          for (auto itr = begin; itr != end; ++itr)
+            if (rex->match(itr->first))
+              matches.push_back(itr->first);
+        } else {
+          for (auto itr = begin; itr != end; ++itr) {
+            if (itr->first.compare(0, prefix.size(), prefix) != 0)
+              break;
+            if (!rex || rex->match(itr->first.substr(prefix.size())))
+              matches.push_back(itr->first);
+          }
+        }
+        regex_dict[regex] = matches;
+      }
+      auto &instrs = instrw->instrs();
+      auto &dict = regex_dict[regex];
+      instrs.insert(instrs.end(), dict.begin(), dict.end());
+    }
+  }
+
+  // Add each InstRW object to every instruction that uses it.
+  // Extract forwarding information from InstRW records.
+  for (InstRW *instrw : instrw_info_) {
+    ExtractForwardingInfo(instrw);
+    for (const auto &instr : instrw->instrs())
+      if (instructions_.count(instr))
+        instructions_[instr]->add_inst_rw(instrw);
+  }
+
+  // If an instruction has SchedRW objects, create InstRW records for it.
+  for (auto &[name, instr] : instructions_)
+    GenerateInstRWs(instr, instr->rw_units(), "", "");
+
+  //--------------------------------------------------------------------------
+  // Instructions' "ins" and "outs" lists specify the instructions declared
+  // operands, and operand types.  The instructions' SchedRW attributes
+  // provide a set of Read or Write resources that are associated with each
+  // component of an operand.  Thus, we need to flatten the instruction's
+  // ins and out lists to describe each operand component, rather than just
+  // the declared operand.
+  //--------------------------------------------------------------------------
+  for (auto &[name, instr] : instructions_) {
+    instr->flattened_ins() = FlattenInstrOperands(instr, instr->ins());
+    instr->flattened_outs() = FlattenInstrOperands(instr, instr->outs());
+  }
+
+  //--------------------------------------------------------------------------
+  // We can specify the relationship between functional units and subunits
+  // in either a bottom up or tops down approach.  In the tops down approach
+  // each functional unit specifies which subunits it implements.  In a
+  // bottoms up approach, each subunit specifies which functional units it
+  // is associated with. The bottoms up approach is more similar to what
+  // tablegen uses, and enables subunits to provide details about each
+  // functional unit they use, and when, and for how long.
+  //--------------------------------------------------------------------------
+  // #define USE_TOPS_DOWN_ARCH_SPEC   // Define this to use tops down approach
+
+  // Create a latency for each InstRW object, and add them to a global table.
+  // For each generated latency, create a subunit which is added to the
+  // instruction and each functional unit associated with the InstRW.
+  for (auto &[name, instr] : instructions_) {
+    for (const auto &[sched_model, inst_rw] : instr->inst_rws()) {
+      auto lats = FormatReferences(instr, sched_model, inst_rw);
+
+      // If specified on the command line, generate subunit bases to tie
+      // subunit templates to sets of instructions.  This is an alternate
+      // way to tie subunits to instructions (rather than the typical approach
+      // of tying instructions to subunits in the instruction definition).
+      std::string subunit_base;
+      if (gen_subunit_bases()) {
+        // Format a list of instructions (or regular expressions) that describe
+        // which instructions use this subunit.
+        std::string bases;
+        if (inst_rw->instregex().empty())
+          for (auto inst : inst_rw->instrs())
+            bases += formatv(" : \"{0}\"", inst);
+        else
+          for (auto regex : inst_rw->instregex())
+            bases += formatv(" : \"{0}\"", regex);
+        subunit_base = formatv(": base{0}", add_subunit_base(bases));
+      }
+
+#ifdef USE_EXPLICIT_LATENCY_TEMPLATES
+      // Create the body of the subunit template.
+      int lat_id = add_latency(lats);
+      std::string subunit =
+          formatv("{1}() { latency lat{0}(); }", lat_id, subunit_base);
+#else
+      auto subunit = formatv("{1}() {{{{{0} }}", lats, subunit_base);
+#endif
+
+      // Add the subunit to the global subunit table.
+      // Add the subunit (id) to the instruction.
+      int subunit_id = add_subunit(subunit);
+      instr->add_subunit(subunit_id);
+      sched_model_subunits_[subunit_id] = sched_model;
+    }
+  }
+}
+
+// Add scheduling information from ItinRW records to each instruction.
+void MachineDescription::ProcessItinRW() {
+  // Create a map of ItinRW records indexed by matched itinerary class names.
+  std::map<std::string, std::vector<const ItinRW *>> itin_map;
+  for (auto *itin_rw : itinrw_info())
+    for (auto &itin_class : itin_rw->itin_classes())
+      itin_map[itin_class].push_back(itin_rw);
+
+  // Add ItinRWs' rw_unit sets to each instruction that references an itinerary.
+  for (const auto &[name, instr] : instructions_)
+    if (!instr->itinerary().empty() && itin_map.count(instr->itinerary())) {
+      for (auto *itin_rw : itin_map[instr->itinerary()])
+        GenerateInstRWs(instr, itin_rw->rw_units(), "", itin_rw->sched_model());
+    }
+}
+
+// Check each instruction to see if it has any scheduling information
+// associated with it.  Print a warning if none is found.
+void MachineDescription::CheckSchedulingInfo() {
+  // If instructions don't have any scheduling info, issue a warning.
+  if (!no_warnings())
+    for (const auto &[name, instr] : instructions_)
+      if (instr->inst_rws().empty() && instr->rw_units().empty() &&
+          instr->itinerary().empty() && !instr->generic_)
+        std::cerr << "Warning: No scheduling info for instr : " << name << "\n";
+}
+
+//------------------------------------------------------------------------
+// If a resource expression is a disjunction of at least one conjunction,
+// reassociate the operands to collect the conjunctions and disjunctions.
+// Example:    A | (B & C) | D)  -->   (A | D) | (B & C)
+//------------------------------------------------------------------------
+void ReassociateDisjunctions(std::vector<ResExpr> &exprs) {
+  auto &expr = exprs.front();
+  auto &opnds = expr.opnds();
+  if (!expr.isOr() || opnds.end() == std::find_if(opnds.begin(), opnds.end(),
+                                                  [](const ResExpr &item) {
+                                                    return item.isAnd();
+                                                  }))
+    return;
+
+  ResExpr original = exprs.front();
+  ResExpr disjunction(ResOp::kOr);
+  exprs.clear();
+  for (auto &item : original.opnds())
+    if (item.isAnd())
+      exprs.push_back(item);
+    else
+      disjunction.opnds().push_back(item);
+  if (!disjunction.opnds().empty())
+    exprs.push_back(disjunction);
+}
+
+//------------------------------------------------------------------------
+// We prefer disjunctions over conjunctions, so if we have a conjunction
+// of a disjunction, convert it to a disjunction of conjunctions.
+//------------------------------------------------------------------------
+void DistributeConjunctions(ResExpr &expr) {
+  auto &opnds = expr.opnds();
+  // Quit if this isn't the kind of expression we're looking for.
+  if (!expr.isAnd() ||
+      std::find_if(opnds.begin(), opnds.end(), [](const ResExpr &item) {
+        return item.isOr();
+      }) == opnds.end())
+    return;
+
+  // Initialize the "answer" to the first expression, or set of expressions.
+  std::vector<ResExpr> answer;
+  if (opnds[0].op() == ResOp::kRes)
+    answer.push_back(opnds[0]);
+  else
+    answer = opnds[0].opnds();
+
+  // Perform the distribution over the rest of the operands of the AND.
+  for (unsigned idx = 1; idx < opnds.size(); idx++) {
+    // If the operand is a single resource, do the distribute in place, since
+    // the result is the same size as the original.
+    if (opnds[idx].op() == ResOp::kRes) {
+      for (auto &item : answer)
+        if (expr.opnds()[idx] != item)
+          item = ResExpr(ResOp::kAnd, item, expr.opnds()[idx]);
+      continue;
+    }
+    // Handle OR operators. Since the result can grow exponentially, we
+    // operate into a clean vector.
+    auto temp = answer; // save off old vector.
+    answer.clear();
+    for (auto &lhs : opnds[idx].opnds())
+      for (auto &rhs : temp)
+        if (lhs != rhs) {
+          ResExpr And(ResOp::kAnd, lhs, rhs);
+          // We're creating an OR of AND's, so there's no reason to add
+          // duplicate results into the answer.
+          if (std::find(answer.begin(), answer.end(), And) == answer.end())
+            answer.push_back(And);
+        }
+  }
+  // Finally, create the OR operator over the distributed expressions.
+  expr = ResExpr(ResOp::kOr, answer);
+}
+
+//------------------------------------------------------------------------
+// Determine if a "FuncUnit" name is actually an issue slot.
+// Tablegen doesn't differentiate between functional unit names and issue
+// slot names, but the MDL has a nice way of supporting issue slots.  So
+// this is a target-specific hack that identifies which functional unit
+// names are actually issue slot names.
+//------------------------------------------------------------------------
+bool MachineDescription::IsIssueSlot(const std::string &name) const {
+  if (family() == "Hexagon")
+    return name.find("SLOT") != std::string::npos;
+  if (family() == "R600") {
+    if (name == "ALU_W" || name == "ALU_X" || name == "ALU_Y" ||
+        name == "ALU_Z" || name == "TRANS")
+      return true;
+  }
+  return false;
+}
+
+//------------------------------------------------------------------------
+// Preprocess InstrStage objects to build resource expressions for each
+// stage. Rewrite ComboUnits as conjunction expressions. The resulting
+// expression is either a resource, a conjunction of resources, or a
+// disjunction of resources and/or conjunctions:
+//     expr     : <res>  |  <conj>  |  <disj> ;
+//     conj     : <res>  ('&'  <res>)* ;
+//     disj     : <conj>  ('|' <conj>)+ ;
+// This reflects the underlying capability of a single itinerary stage.
+//------------------------------------------------------------------------
+void MachineDescription::PreprocessItinStages() {
+  // For each stage definition, build a resource expression and stash it in
+  // the stage.  Expand ComboUnits to conjunctions of their constituant parts.
+  // Also note for each stage whether it's an issue stage.
+  for (const auto &[name, stage] : instr_stages_) {
+    std::vector<ResExpr> operands;
+    stage->set_issue_flag();
+    for (auto &unit : stage->func_units()) {
+      if (!IsComboUnit(unit))
+        operands.emplace_back(unit);
+      else
+        operands.emplace_back(ResOp::kAnd, combo_units_[unit]);
+      if (!IsIssueSlot(unit))
+        stage->clear_issue_flag();
+    }
+    // If there's more than one item in the list, create a disjunction.
+    // Otherwise, just annotate the stage with the single expression.
+    if (operands.size() > 1)
+      stage->set_resource_expr(ResExpr(ResOp::kOr, operands));
+    else
+      stage->set_resource_expr(operands[0]);
+  }
+}
+
+//------------------------------------------------------------------------
+// Scan itinerary data objects and collect issue slots and template
+// arguments.
+//------------------------------------------------------------------------
+void MachineDescription::PreprocessInstrItineraryData() {
+  PreprocessItinStages();
+
+  for (auto [name, itin_data] : instr_itinerary_data_) {
+    // Scan stages for issue stages, and accumulate the expressions into one.
+    ResExpr issue;
+    for (const auto &stage_name : itin_data->stages()) {
+      auto *stage = instr_stages(stage_name);
+      if (stage->is_issue_stage()) {
+        if (issue.isNop())
+          issue = stage->resource_expr();
+        else
+          issue = ResExpr(ResOp::kAnd, issue, stage->resource_expr());
+      }
+    }
+
+    // MDL doesn't support conjunctions of disjunctions for issue stages,
+    // so redistribute them, then split the disjunctions into separate
+    // expressions.
+    if (!issue.isNop()) {
+      DistributeConjunctions(issue);
+      itin_data->set_issue_stage(issue);
+      ReassociateDisjunctions(itin_data->issue_stages());
+    }
+
+    // Scan non-issue stages, and parse resource expressions into template
+    // arguments.  Stages with the same pipeline stage and cycle are
+    // conjunctions.
+    std::map<std::pair<int, int>, ResExpr> exprset;
+    int phase = 0;
+    for (auto &stage_name : itin_data->stages()) {
+      auto *stage = instr_stages(stage_name);
+      if (!stage->is_issue_stage()) {
+        auto index = std::make_pair(phase, stage->cycles());
+        if (exprset.count(index) == 0)
+          exprset[index] = stage->resource_expr();
+        else
+          exprset[index] =
+              ResExpr(ResOp::kAnd, exprset[index], stage->resource_expr());
+      }
+      phase += stage->cycle_increment();
+    }
+
+    // Create a template argument for each expression. For conjunctions,
+    // split out each disjunction subexpression as a separate argument.
+    std::vector<ResArg> args;
+    for (auto &[index, expr] : exprset) {
+      if (!expr.isAnd()) {
+        args.emplace_back(expr, index.first, index.second);
+        continue;
+      }
+      std::vector<ResExpr> and_operands;
+      std::vector<ResExpr> or_operands;
+      for (auto &operand : expr.opnds()) {
+        if (operand.isOr())
+          or_operands.push_back(operand);
+        else
+          and_operands.push_back(operand);
+      }
+
+      if (and_operands.size() == 1) {
+        args.emplace_back(and_operands[0], index.first, index.second);
+      } else if (and_operands.size() != 0) {
+        args.emplace_back(ResExpr(ResOp::kAnd, and_operands), index.first,
+                          index.second);
+      }
+
+      for (auto Or : or_operands)
+        args.emplace_back(Or, index.first, index.second);
+    }
+
+    // Expand disjunctions into a set of expressions.
+    for (auto &arg : args)
+      ReassociateDisjunctions(arg.exprs());
+    itin_data->set_resource_args(args);
+  }
+}
+
+// Add latencies and subunits for each instruction with an itinerary.
+void MachineDescription::ProcessItineraries() {
+  PreprocessInstrItineraryData();
+
+  // Annotate InstrItinData objects with their associated models.
+  for (const auto &[name, model] : sched_models()) {
+    if (model->has_itineraries())
+      if (auto *itins = processor_itineraries()[model->itineraries()])
+        for (auto &itin_name : itins->iid())
+          instr_itinerary_data_[itin_name]->add_model(name);
+  }
+
+  // For each instruction with an itinerary, create a combined subunit/latency
+  // record, and create a functional unit which uses that subunit, then add
+  // the functional unit to any processor that supports the instruction. Note
+  // that instructions may have inst_rws AND itineraries.
+  for (const auto &[name, inst] : instructions_) {
+    if (!inst->itinerary().empty()) {
+      for (auto *itin : instr_itinerary_class(inst->itinerary())) {
+        auto subunit = FormatItinSubunit(inst, itin);
+        int subunit_id = add_subunit(subunit);
+        auto *fu = add_itinerary_fu(itin);
+        inst->add_subunit(subunit_id);
+        fu->add_subunit(subunit_id, itin);
+
+        for (const auto &[name, model] : sched_models()) {
+          if (const auto *itins = processor_itineraries()[model->itineraries()])
+            if (itins->iid().count(itin->name()))
+              model->add_itinerary_func_unit(fu);
+        }
+      }
+    }
+  }
+}
+
+// Create a new functional unit template based on an itinerary.
+// TODO(tbd): Use IID Class name?
+ItineraryFuncUnit *
+MachineDescription::add_itinerary_fu(InstrItineraryData *itin) {
+  auto &stages = itin->stages();
+  if (itinerary_fus_.count(stages))
+    return itinerary_fus_[stages];
+  auto name = formatv("FU{0}", itinerary_fus_.size());
+  auto *new_fu = new ItineraryFuncUnit(name, itin);
+  itinerary_fus_.insert({stages, new_fu});
+  return new_fu;
+}
+
+} // namespace scan
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/TdScan/debug.cpp b/llvm/utils/TdScan/debug.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/debug.cpp
@@ -0,0 +1,921 @@
+//===- debug.cpp - Dump debug information for all tablegen records --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Write out all the internal data objects that contain TableGen information
+//
+//===----------------------------------------------------------------------===//
+
+#include <iostream>
+#include <string>
+
+#include "scan.h"
+
+namespace mpact {
+namespace mdl {
+namespace scan {
+
+//---------------------------------------------------------------------------
+// Write out all the things we've scraped from the TableGen file.
+//---------------------------------------------------------------------------
+void MachineDescription::DumpTableGenInfo() {
+  DumpStats();
+  DumpSchedMachineModel();
+  DumpProcessorModel();
+  DumpFUInfo();
+  DumpSchedReadWrite(true);
+  DumpInstRW();
+  DumpItinRW();
+  DumpForwardingInfo();
+  DumpSchedVariant();
+  DumpSchedVar();
+  DumpPredicates();
+  DumpSchedAlias();
+  DumpWriteSequence();
+  DumpProcessorItineraries();
+  DumpInstrStage();
+  DumpInstrItineraryData();
+  DumpBypasses();
+  DumpInstrItineraryClasses();
+  DumpInstructions();
+}
+
+void MachineDescription::DumpForwardingGraph() {
+  std::cout << "\nForwarding Network:\n";
+  for (const auto &[tuple, latency] : forwarding_network_.graph()) {
+    const auto &[model, def, use] = tuple;
+    std::cout << formatv("  {0}: {1}-->{2} = <", model, def, use);
+    for (auto lat : latency)
+      std::cout << lat << ",";
+    std::cout << ">\n";
+  }
+}
+
+void MachineDescription::DumpInstrForwardingInfo() {
+  for (const auto &[name, instr] : instructions_) {
+    std::string out;
+    for (auto &[sched_model, inst_rw] : instr->inst_rws())
+      out += inst_rw->FwdString(this);
+    if (out.empty())
+      continue;
+    out += "   Functional units: ";
+    for (auto &[sched_model, inst_rw] : instr->inst_rws())
+      for (const auto &unit : inst_rw->func_units())
+        out += unit + ",";
+    out += "\n";
+
+    std::cout << instr->Format(false) + out + "\n";
+  }
+}
+
+//---------------------------------------------------------------------------
+// Write out some statistics about the tablegen input.
+//---------------------------------------------------------------------------
+void MachineDescription::DumpStats() {
+  std::cerr << formatv("\nInstructions: {0}\n"
+                       "Schedule Models: {1}\n"
+                       "Operands: {2}\n"
+                       "CPUs: {3}\n"
+                       "Processor Resources: {4}\n"
+                       "InstRW definitions: {5}\n"
+                       "ReadWrite units: {6}\n"
+                       "Subunits: {7}\n"
+                       "Latencies: {8}\n",
+                       instructions_.size(), sched_models_.size(),
+                       operands_.size(), cpus_.size(), proc_resources_.size(),
+                       instrw_info_.size(), rw_units_.size(), subunits_.size(),
+                       latencies_.size());
+}
+
+//---------------------------------------------------------------------------
+// Format a name indicating what kind of name it is.
+//---------------------------------------------------------------------------
+std::string MachineDescription::FormatName(const std::string &name) const {
+  std::string out = name;
+
+  if (IsInstruction(name))
+    out += ".I";
+  if (IsOperand(name))
+    out += ".O";
+  if (IsProcessorModel(name))
+    out += ".C";
+  if (IsSchedModel(name))
+    out += ".SM";
+  if (IsProcResource(name))
+    out += ".FU";
+  if (IsSchedReadWrite(name))
+    out += ".RW";
+  if (IsSchedVariant(name))
+    out += ".SVS";
+  if (IsSchedVar(name))
+    out += ".sv";
+  if (IsMCSchedPredicate(name))
+    out += ".MSP";
+  if (IsSchedPredicate(name))
+    out += ".SP";
+  if (IsMCInstPredicate(name))
+    out += ".MIP";
+  if (IsSchedAliasMatch(name))
+    out += ".AM";
+  if (IsWriteSequence(name))
+    out += ".WS";
+  if (IsProcessorItinerary(name))
+    out += ".PI";
+  if (IsInstrItineraryClass(name))
+    out += ".IC";
+  if (IsInstrItineraryData(name))
+    out += ".ID";
+  if (IsInstrStage(name))
+    out += ".IS";
+  if (IsBypass(name))
+    out += ".BP";
+  return out;
+}
+
+std::string MachineDescription::DumpSchedRW(const std::string &name,
+                                            const std::string &model,
+                                            std::string prefix) {
+  if (IsSchedReadWrite(name))
+    return prefix + rw_units_[name]->ToString(this, model, true);
+  if (IsWriteSequence(name))
+    return prefix + write_sequences_[name]->ToString(this);
+  if (IsSchedVariant(name))
+    return prefix + sched_variants_[name]->ToString(this, prefix + "   ");
+  if (IsSchedAliasMatch(name) && sched_alias_matches_[name].count(model))
+    return prefix +
+           DumpSchedRW(sched_alias_matches_[name][model], model, prefix);
+  return prefix + "Error(" + name + ")";
+}
+
+std::string Instruction::ToString(MachineDescription *md) {
+  std::string out = Format(false); // just format the instruction and opnds.
+  if (!flattened_outs().empty()) {
+    out += "  Flattened outs: ";
+    for (const auto &opnd : flattened_outs())
+      out += opnd + ", ";
+    out += "\n";
+  }
+  if (!flattened_ins().empty()) {
+    out += "  Flattened ins: ";
+    for (const auto &opnd : flattened_ins())
+      out += opnd + ", ";
+    out += "\n";
+  }
+
+  if (!inst_rws().empty()) {
+    out += "   RWUnits=[";
+    for (auto &rw_unit : rw_units())
+      out += formatv("{0},", md->FormatName(rw_unit));
+    out += "]\n";
+  }
+
+  for (auto &[sched_model, inst_rw] : inst_rws()) {
+    out += formatv("  InstRW {0}: SchedModel={1} RWUnits=[", inst_rw->name(),
+                   sched_model);
+    for (auto &rw_unit : inst_rw->rw_units())
+      out += formatv("{0}, ", md->FormatName(rw_unit));
+    out += "]\n";
+    for (auto &rw_unit : inst_rw->rw_units())
+      out += md->DumpSchedRW(rw_unit, sched_model, "   ");
+
+    out += inst_rw->FwdString(md);
+
+    auto refs = md->FormatReferences(this, sched_model, inst_rw);
+    int index = md->add_latency(refs);
+    out += formatv("     {0} (index={1})\n", refs, index);
+  }
+
+  // Gather the register operands, and find the last explicit operand ref.
+  auto opnds = md->GetRegisterOperands(this);
+  int last_opnd_idx = 0;
+  for (auto &[type, name, op_id] : opnds)
+    if (op_id > last_opnd_idx)
+      last_opnd_idx = op_id;
+
+  int last_operand_cycles = 0; // Consistency check operand_cycles size.
+
+  if (!itinerary_.empty()) {
+    for (auto *itin : md->instr_itinerary_class(itinerary_)) {
+      std::string separator_and;
+      out +=
+          formatv("  Itin name:{0}, Itin class:{1}  :\n    FUS:", itin->name(),
+                  itinerary_);
+      for (const auto &stage : itin->stages()) {
+        auto *instr_stage = md->instr_stages(stage);
+        out += formatv("{0}(", separator_and);
+        std::string separator_or;
+        for (const auto &unit : instr_stage->func_units()) {
+          out += formatv("{0}{1}", separator_or, unit);
+          separator_or = "||";
+        }
+        out += ")";
+        separator_and = " && ";
+      }
+      out += "\n    CPUS: ";
+      for (const auto &[name, model] : md->sched_models()) {
+        if (auto *itins = md->processor_itineraries()[model->itineraries()])
+          if (itins->iid().count(itin->name()))
+            out += formatv("{0},", model->name());
+      }
+      out += "\n    Operand cycles = [";
+      for (const auto &operand_cycle : itin->operand_cycles())
+        out += formatv("{0}, ", operand_cycle);
+      out += "]";
+      int cycle_size = itin->operand_cycles().size();
+      if (cycle_size) {
+        if (last_operand_cycles != 0 && last_operand_cycles != cycle_size)
+          out += " Inconsistent number of operand cycles.";
+        if (last_opnd_idx >= cycle_size)
+          out += " Too few operand cycles.";
+        last_operand_cycles = itin->operand_cycles().size();
+      }
+      out += "\n";
+    }
+  }
+
+  return out + "}\n";
+}
+
+//---------------------------------------------------------------------------
+// Format SchedMachineModel information
+//---------------------------------------------------------------------------
+std::string SchedMachineModel::ToString(const MachineDescription *md) const {
+  auto out =
+      formatv("SchedMachineModel {0}, issue_width={1}, load_latency={2}, "
+              "micro_op_buffer_size={3}, misprediction_penalty={4}, "
+              "itineraries={5}\n    CPUS: ",
+              name_, issue_width_, load_latency_, micro_op_buffer_size_,
+              mispredict_penalty_, itineraries_);
+  for (auto &cpu : cpus_)
+    out += formatv("{0}, ", cpu);
+
+  out += "\n     FUS: ";
+  for (const auto &fu : func_units_)
+    out += formatv("{0}, ", fu);
+  return out + "\n\n";
+}
+
+//----------------------------------------------------------------------------
+// Format InstrStage records.
+//----------------------------------------------------------------------------
+std::string InstrStage::ToString(const MachineDescription *md) const {
+  std::string out =
+      formatv("      InstrStage {0}: cycles={1}, func_units=[", name_, cycles_);
+  for (const auto &func_unit : func_units_)
+    out += formatv("{0}, ", func_unit);
+  out += formatv("], timeinc={0}", timeinc_);
+
+  if (!resource_expr_.isNop())
+    out += ",  " + resource_expr_.ToString();
+  return out + "\n";
+}
+
+//---------------------------------------------------------------------------
+// Format Processor model information.
+//---------------------------------------------------------------------------
+std::string ProcessorModel::ToString(MachineDescription *md) const {
+  return formatv("Cpu {0}, SchedModel={1}\n", name_, sched_model_);
+}
+
+//----------------------------------------------------------------------------
+// Format SchedVar records.
+//----------------------------------------------------------------------------
+std::string SchedVar::ToString(MachineDescription *md,
+                               std::string prefix) const {
+  std::string out =
+      formatv("{2}SchedVar {0}:\n"
+              "{2}   Predicate={1},\n"
+              "{2}   Selected=[",
+              md->FormatName(name_), md->FormatName(predicate_), prefix);
+
+  for (const auto &rw_unit : selected_)
+    out += formatv("{0},", md->FormatName(rw_unit));
+  out += "]\n";
+  for (const auto &rw_unit : selected_)
+    out += md->DumpSchedRW(rw_unit, "?", prefix + "      ");
+
+  return out + "\n";
+}
+
+//----------------------------------------------------------------------------
+// Format SchedVar records.
+//----------------------------------------------------------------------------
+std::string SchedVariant::ToString(MachineDescription *md,
+                                   std::string prefix) const {
+  std::string out = is_write_ ? "--->VariantWrite" : "--->VariantRead ";
+  out += formatv("={0}, schedModel={1}, ", md->FormatName(name_), sched_model_);
+
+  if (is_variadic())
+    out += "Variadic ";
+  out += "Variants[=";
+  for (const auto &variant : variants_)
+    out += formatv("{0},", md->FormatName(variant));
+  out += "]\n";
+  for (const auto &variant : variants_)
+    out += md->sched_vars()[variant]->ToString(md, prefix + "   ");
+
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format Function Unit information.
+//----------------------------------------------------------------------------
+std::string ProcResource::ToString(MachineDescription *md) const {
+  auto out = formatv("Funcunit {0}<{1}>, {2} super={3} is resource grp={4} "
+                     "Buffer Size={5}",
+                     name_, num_instances_, sched_model_, fu_super_,
+                     is_proc_res_group_, buffer_size_);
+  if (!group_names_.empty()) {
+    out += ", Resources=[";
+    for (const auto &fu : group_names_)
+      out += formatv("{0} ", fu);
+    out += "]";
+  }
+  return out + "\n";
+}
+
+//----------------------------------------------------------------------------
+// Format SchedReadWrite records.
+//----------------------------------------------------------------------------
+std::string SchedReadWrite::ToString(MachineDescription *md,
+                                     const std::string &model,
+                                     bool all_info) const {
+  if (all_info) {
+    std::string out = is_write_ ? "Write-Unit " : "Read-Unit ";
+    out += formatv("{0}, \n", name_);
+
+    std::string unit;
+    for (auto &[sched_model, sched_model_info] : sched_model_info_) {
+      if (!model.empty() && model.find(sched_model) == std::string::npos)
+        continue;
+      unit += formatv("      (SchedModel={0} Latency={1} MOps={2}", sched_model,
+                      sched_model_info.latency, sched_model_info.micro_ops);
+
+      if (sched_model_info.is_begin_group)
+        unit += " BeginGroup";
+      if (sched_model_info.is_end_group)
+        unit += " EndGroup";
+      if (sched_model_info.is_single_issue)
+        unit += " SingleIssue";
+      if (sched_model_info.retire_ooo)
+        unit += " RetireOOO";
+      unit += is_write_ ? " FUs=[" : ", ValidWrites=[";
+      for (auto &fu : sched_model_info.names)
+        unit += formatv("{0},", md->FormatName(fu));
+      if (is_write_) {
+        unit += "] Resource Cycles=[";
+        for (auto cycle : sched_model_info.res_cycles)
+          unit += formatv("{0},", cycle);
+        unit += "] StartAt Cycles=[";
+        for (auto cycle : sched_model_info.start_cycles)
+          unit += formatv("{0},", cycle);
+      }
+      unit += "]),\n";
+    }
+    if (unit.empty())
+      return out + "\n";
+    return out + unit;
+  }
+
+  if (!is_write_)
+    return "";
+
+  // Debugging stuff.
+  std::string out;
+  for (auto &[model, info] : sched_model_info_) {
+    auto &func_units = info.names;
+    auto &res_cycles = info.res_cycles;
+    auto &start_cycles = info.start_cycles;
+    int micro_ops = info.micro_ops;
+    if (func_units.size() == 0 || micro_ops == (int)func_units.size())
+      continue;
+
+    out += formatv("{0}: {1,-20} \t[ ", md->family(), name());
+    for (auto &unit : func_units) {
+      out += formatv("{0}", unit);
+      if (md->IsProcResourceGroup(unit))
+        out +=
+            formatv("({0})", md->proc_resources()[unit]->group_names().size());
+      auto *funit = md->proc_resources()[unit];
+      if (funit->fu_super() != "")
+        out += formatv("<<{0}>>", funit->fu_super());
+      if (!funit->child_func_units().empty()) // it IS a superunit
+        out += ("<<>>");
+      if (funit->num_instances() != 1)
+        out += formatv("[[{0}]]", funit->num_instances());
+      out += " ";
+    }
+    out += "] \t[ ";
+    for (auto cycles : res_cycles)
+      out += formatv("{0} ", cycles);
+    if (!start_cycles.empty()) {
+      out += "] \t@[ ";
+      for (auto cycles : start_cycles)
+        out += formatv("{0} ", cycles);
+    }
+    out += formatv("] \t{0}", micro_ops);
+    if (info.is_begin_group)
+      out += " BeginGroup";
+    if (info.is_end_group)
+      out += " EndGroup";
+    if (info.is_single_issue)
+      out += " SingleIssue";
+    if (info.retire_ooo)
+      out += " RetireOOO";
+
+    std::string com;
+    if (!res_cycles.empty() && res_cycles.size() != func_units.size())
+      com += formatv(" F{0}{2}R{1}", func_units.size(), res_cycles.size(),
+                     func_units.size() < res_cycles.size() ? "<" : ">");
+    if (res_cycles.empty() && micro_ops != 0 &&
+        func_units.size() != (unsigned)micro_ops)
+      com += formatv(" F{0}{2}M{1}", func_units.size(), micro_ops,
+                     (int)func_units.size() < micro_ops ? "<" : ">");
+    if (!res_cycles.empty() && micro_ops != 0) {
+      int sum_cycles = 0;
+      for (auto &cycles : res_cycles)
+        sum_cycles += cycles;
+      if (sum_cycles != micro_ops)
+        com += formatv(" R{0}{2}M{1}", sum_cycles, micro_ops,
+                       sum_cycles < micro_ops ? "<" : ">");
+    }
+    if (!com.empty())
+      out += "\t   ///" + com;
+    out += "\n";
+  }
+
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format SchedAlias records.
+//----------------------------------------------------------------------------
+std::string SchedAlias::ToString(MachineDescription *md) const {
+  return formatv("SchedAlias {0}: SchedModel={1}, Match RW={2} Alias RW={3}\n",
+                 name_, sched_model_, md->FormatName(match_rw_),
+                 md->FormatName(alias_rw_));
+}
+
+//----------------------------------------------------------------------------
+// Format SchedPredicate records.
+//----------------------------------------------------------------------------
+std::string SchedPredicate::ToString(MachineDescription *md) const {
+  return formatv("SchedPredicate {0}: SchedModel={1}, predicate={2}\n", name_,
+                 sched_model_, md->FormatName(predicate_));
+}
+
+//----------------------------------------------------------------------------
+// Format MCSchedPredicate records.
+//----------------------------------------------------------------------------
+std::string MCSchedPredicate::ToString(MachineDescription *md) const {
+  auto out = formatv("MCSchedPredicate {0}", name_);
+  if (!sched_model_.empty())
+    out += formatv(": SchedModel={0}", sched_model_);
+  return out + formatv(", MCInstPred={0}\n", md->FormatName(inst_predicate_));
+}
+
+//----------------------------------------------------------------------------
+// Indent formatting of predicates an appropriate amount.
+//----------------------------------------------------------------------------
+void PredIndent(std::string *out, int indent) {
+  *out += std::string(indent * 2, ' ');
+}
+
+//----------------------------------------------------------------------------
+// Format MCInstPredicate components.
+//----------------------------------------------------------------------------
+std::string MCPredFormat(const std::string &name, MachineDescription *md,
+                         int indent) {
+  if (name.empty())
+    return "";
+  std::string out;
+  PredIndent(&out, indent);
+
+  if (md->mc_inst_predicates().count(name))
+    return out + formatv("MCInstPredicate={0}: {1}", name,
+                         md->mc_inst_predicates()[name]->ToString(md, indent));
+  else if (md->statements().count(name))
+    return out + formatv("MCStatement={0}\n{1}", name,
+                         md->statements()[name]->ToString(md, indent));
+  else if (md->switch_cases().count(name))
+    return out + formatv("MCOpcodeSwitchCases={0}\n{1}", name,
+                         md->switch_cases()[name]->ToString(md, indent));
+  return out + formatv("Unknown Object: {0}\n", name);
+}
+
+//----------------------------------------------------------------------------
+// Format MCInstPredicate records.
+//----------------------------------------------------------------------------
+std::string MCInstPredicate::ToString(MachineDescription *md,
+                                      int indent) const {
+  std::string out;
+  if (indent == 0 && str_contains(name_, "anonymous_"))
+    return "";
+
+  if (indent == 0) {
+    out += formatv("MCInstPredicate={0}: ", name_);
+  } else if (!str_contains(name_, "anonymous_")) {
+    out += "{...}\n";
+    return out;
+  }
+
+  if (name_ == kTrue || name_ == kFalse)
+    return out + "\n";
+
+  out += "[";
+  for (auto &attr : attributes_)
+    out += formatv("{0},", attr);
+  out += "]";
+
+  if (!function_name_.empty())
+    out += formatv(", FN={0}", function_name_);
+  if (!function_mapper_.empty())
+    out += formatv(", FuncMapper={0}", function_mapper_);
+  if (!opindex_.empty())
+    out += formatv(", opindex={0}", opindex_);
+  if (!immval_.empty())
+    out += formatv(", immval={0}", immval_);
+  if (!register_name_.empty())
+    out += formatv(", Register={0}", register_name_);
+  if (!valid_opcodes_.empty()) {
+    out += ", valid_opcodes=[";
+    for (auto &opcode : valid_opcodes_)
+      out += formatv("{0},", opcode);
+    out += "]";
+  }
+
+  out += formatv("\n{0}", MCPredFormat(statement_, md, indent + 1));
+
+  if (!predicates_.empty()) {
+    for (auto &pred : predicates_)
+      out += MCPredFormat(pred, md, indent + 1);
+  }
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format MCStatement records.
+//----------------------------------------------------------------------------
+std::string MCStatement::ToString(MachineDescription *md, int indent) const {
+  std::string out;
+  if (indent == 0 && str_contains(name_, "anonymous_"))
+    return "";
+
+  if (!predicate_.empty())
+    out = MCPredFormat(predicate_, md, indent + 1);
+
+  if (!cases_.empty()) {
+    PredIndent(&out, indent);
+    out += "  Cases=\n";
+    for (auto &opcode : cases_)
+      out += MCPredFormat(opcode, md, indent + 2);
+  }
+
+  if (!default_.empty())
+    out += MCPredFormat(default_, md, indent + 1);
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format MCOpcodeSwitchCase records.
+//----------------------------------------------------------------------------
+std::string MCOpcodeSwitchCase::ToString(MachineDescription *md,
+                                         int indent) const {
+  std::string out;
+  if (indent == 0)
+    return "";
+
+  PredIndent(&out, indent);
+  out += "  Opcodes=[";
+  for (auto &opcode : cases_)
+    out += formatv("{0},", opcode);
+  out += "]\n";
+
+  if (!case_stmt_.empty()) {
+    PredIndent(&out, indent);
+    out += formatv("  Default=({0})\n{1}", case_stmt_,
+                   MCPredFormat(case_stmt_, md, indent + 2));
+  }
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format WriteSequence records.
+//----------------------------------------------------------------------------
+std::string WriteSequence::ToString(MachineDescription *md) const {
+  std::string out =
+      formatv("WriteSequence Name={0}, SchedModel={1}, Repeat={2}, Writes=[",
+              name_, sched_model_, repeat_);
+
+  for (const auto &write : writes_)
+    out += formatv("{0},", md->FormatName(write));
+
+  return out + "]\n";
+}
+
+//----------------------------------------------------------------------------
+// Format InstRW records.
+//----------------------------------------------------------------------------
+std::string InstRW::ToString(const MachineDescription *md, bool full) const {
+  std::string out = "InstRW: regex=[";
+  for (const auto &regex : instregex_)
+    out += formatv("\"{0}\",", regex);
+  out += "], instrs[";
+  if (full)
+    for (const auto &instr : instrs_)
+      out += formatv("{0},", instr);
+
+  out += formatv("], schedModel={0},\n   RWUnits=[", sched_model_);
+  for (const auto &rw_unit : rw_units_)
+    out += formatv("{0},", md->FormatName(rw_unit));
+  return out + "]\n" + FwdString(md) + "\n";
+}
+
+//-------------------------------------------------------------------------
+// Format InstrRW forwarding information.
+//-------------------------------------------------------------------------
+std::string InstRW::FwdString(const MachineDescription *md) const {
+  if (forwarding_info_.empty())
+    return "";
+
+  std::string out;
+  for (auto [res_id, latency, def, use, pred] : forwarding_info_) {
+    if (pred == "NoSchedPred")
+      pred = "";
+    out += formatv("   Forward {2}-->{3}: res {0}, latency {1} :{4} {5}\n",
+                   res_id, latency, def, use, pred, sched_model_);
+  }
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format ItinRW records.
+//----------------------------------------------------------------------------
+std::string ItinRW::ToString(const MachineDescription *md) const {
+  std::string out = "ItinRW " + name_ + " : ";
+  out += formatv("schedModel={0}, RWUnits=[", sched_model_);
+  for (const auto &rw_unit : rw_units_)
+    out += formatv("{0},", md->FormatName(rw_unit));
+  out += "]\n    Itin Classes=[";
+  for (const auto &unit : itin_classes_)
+    out += unit + ",";
+  return out + "]\n";
+}
+
+//-------------------------------------------------------------------------
+// Format Processor Itineraries records.
+//-------------------------------------------------------------------------
+std::string ProcessorItineraries::ToString(const MachineDescription *md) const {
+  auto out = formatv("ProcessorItineraries name {0}, func_units=[", name_);
+  for (const auto &resource : resources_)
+    out += formatv("{0}, ", resource);
+  out += "], Bypass=[";
+  for (const auto &bypass : bypass_)
+    out += formatv("{0}, ", bypass);
+
+  out += "], IID=[";
+  for (const auto &iid : iid_)
+    out += formatv("{0}, ", iid);
+  return out + "]\n\n";
+}
+
+//----------------------------------------------------------------------------
+// Format Instruction Itinerary Data records.
+//----------------------------------------------------------------------------
+std::string InstrItineraryData::ToString(MachineDescription *md) const {
+  auto out = formatv("    InstrItineraryData {0}: class={1}, micro_ops={2}",
+                     name_, instr_itinerary_class_, num_micro_ops_);
+  out += ", operand_cycles=[";
+  for (const auto &operand_cycle : operand_cycles_)
+    out += formatv("{0}, ", operand_cycle);
+  out += "], bypasses=[";
+  for (const auto &bypass : bypasses_)
+    out += formatv("{0}, ", bypass);
+
+  out += "], stages=[";
+  for (const auto &stage : stages_)
+    out += formatv("{0}, ", stage);
+  out += "]\n";
+  for (const auto &stage : stages_)
+    out += md->instr_stages(stage)->ToString(md);
+
+  if (!issue_stages_.empty()) {
+    out += "    ==> Issue resources: ";
+    for (auto &expr : issue_stages_)
+      out += expr.ToString() + " || ";
+    out = out.substr(0, out.size() - 4);
+    out += "\n";
+  }
+  if (!resource_args_.empty()) {
+    for (unsigned arg = 0; arg < resource_args_.size(); arg++)
+      out += formatv("    ==> Template argument {0}: {1}\n", arg + 1,
+                     resource_args_[arg].ToString());
+    out += "\n";
+  }
+  return out;
+}
+
+constexpr char divider[] =
+    "\n//-------------------------------------------------------------------\n";
+static void DumpHeader(const char *title, int size) {
+  std::cout << formatv("{0}// {1} ({2} entries){0}", divider, title, size);
+}
+
+//----------------------------------------------------------------------------
+// Dump out all processor variants.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpProcessorModel() {
+  DumpHeader("CPU Description Classes", cpus_.size());
+  for (const auto &[name, cpu] : cpus_)
+    cpu->Dump(this);
+}
+
+//----------------------------------------------------------------------------
+// Dump out all schedule models.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpSchedMachineModel() {
+  DumpHeader("SchedModel Description Classes", sched_models_.size());
+  for (const auto &[name, model] : sched_models_)
+    model->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all functional units.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpFUInfo() {
+  DumpHeader("Functional Unit Descriptions", proc_resources_.size());
+  for (const auto &[name, fu] : proc_resources_)
+    fu->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all rwunits.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpSchedReadWrite(bool all_info) {
+  DumpHeader("SchedReadWrite Descriptions", rw_units_.size());
+  for (const auto &[name, rwunit] : rw_units_)
+    rwunit->Dump(this, all_info);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all InstRW records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpInstRW() {
+  DumpHeader("InstRW Descriptions", instrw_info_.size());
+  for (const auto *instrw : instrw_info_)
+    instrw->Dump(this, true);
+}
+
+// ----------------------------------------------------------------------------
+// Dump InstRW records that have ReadAdvance ReadWrite records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpForwardingInfo() {
+  int count = 0;
+  for (auto *instrw : instrw_info_)
+    if (!instrw->forwarding_info().empty())
+      count++;
+
+  DumpHeader("Forwarding Descriptions", count);
+  for (auto *instrw : instrw_info_)
+    if (!instrw->forwarding_info().empty())
+      instrw->Dump(this, false);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all ItinRW records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpItinRW() {
+  DumpHeader("ItinRW Descriptions", itinrw_info_.size());
+  for (const auto *itinrw : itinrw_info_)
+    itinrw->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all SchedVariant records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpSchedVariant() {
+  DumpHeader("SchedVariant Descriptions", sched_variants_.size());
+  for (const auto &[name, variant] : sched_variants_)
+    variant->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all SchedVar records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpSchedVar() {
+  DumpHeader("SchedVar Descriptions", sched_vars_.size());
+  for (const auto &[name, var] : sched_vars_)
+    var->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all MCSchedPredicate, MCInstPredicates, SchedPredicates, and
+// associated statements.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpPredicates() {
+  DumpHeader("MCSchedPredicate Descriptions", mc_sched_predicates_.size());
+  for (const auto &[name, sp] : mc_sched_predicates_)
+    sp->Dump(this);
+
+  DumpHeader("SchedPredicate Descriptions", sched_predicates_.size());
+  for (const auto &[name, sp] : sched_predicates_)
+    sp->Dump(this);
+
+  DumpHeader("MCInstPredicate Descriptions", mc_inst_predicates_.size());
+  for (const auto &[name, fp] : mc_inst_predicates_)
+    fp->Dump(this);
+
+  DumpHeader("MCStatement Descriptions", statements_.size());
+  for (const auto &[name, stmt] : statements_)
+    stmt->Dump(this);
+
+  DumpHeader("MCOpcodeSwitchCase Descriptions", switch_cases_.size());
+  for (const auto &[name, stmt] : switch_cases_)
+    stmt->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all SchedAlias records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpSchedAlias() {
+  DumpHeader("SchedAlias Descriptions", sched_alias_.size());
+  for (const auto &[name, alias] : sched_alias_)
+    alias->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all WriteSequence records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpWriteSequence() {
+  DumpHeader("WriteSequence Descriptions", write_sequences_.size());
+  for (const auto &[name, write_sequence] : write_sequences_)
+    write_sequence->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all ProcessorItineraries records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpProcessorItineraries() {
+  DumpHeader("ProcessorItinerary Descriptions", processor_itineraries_.size());
+  for (const auto &[name, processor_itinerary] : processor_itineraries_)
+    processor_itinerary->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all InstrStage records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpInstrStage() {
+  DumpHeader("InstrStage Descriptions", instr_stages_.size());
+  for (const auto &[name, instr_stage] : instr_stages_)
+    instr_stage->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all InstrItinClasses records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpInstrItineraryClasses() {
+  DumpHeader("InstrItinClass Descriptions", instr_itinerary_class_.size());
+  std::string out;
+  for (const auto &[name, members] : instr_itinerary_class_) {
+    std::cout << formatv("  InstrItinClass {0}:\n", name);
+    for (auto *member : members)
+      member->Dump(this);
+    std::cout << "\n";
+  }
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all InstrItinData records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpInstrItineraryData() {
+  DumpHeader("InstrItinData Descriptions", instr_itinerary_data_.size());
+  for (const auto &[name, instr_itin_data] : instr_itinerary_data_)
+    instr_itin_data->Dump(this);
+}
+
+// ----------------------------------------------------------------------------
+// Dump out all Bypasses records.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpBypasses() {
+  DumpHeader("Bypass Descriptions", bypasses_.size());
+  for (const auto &name : bypasses_)
+    std::cout << name << ", ";
+  std::cout << "\n";
+}
+
+// ----------------------------------------------------------------------------
+// Dump out information about all instruction definitions.
+//----------------------------------------------------------------------------
+void MachineDescription::DumpInstructions() {
+  DumpHeader("Instruction Descriptions", instructions_.size());
+  for (const auto &[name, instr] : instructions_)
+    instr->Dump(this);
+}
+
+} // namespace scan
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/TdScan/output.cpp b/llvm/utils/TdScan/output.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/output.cpp
@@ -0,0 +1,1447 @@
+//===- output.cpp - Write out the generate machine description ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file generates the MDL output file.  The functional are arranged
+// in the order we normally expect to see things:
+//      Pipeline definitions;
+//      CPU definitions;
+//      Functional unit template definitions;
+//      Subunit/latency template definitions;
+//      Latency template definitions;
+//      Register definitions;
+//      Register class definitions;
+//      Operand definitions;
+//      Instruction definitions;
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <ostream>
+#include <set>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "scan.h"
+
+namespace mpact {
+namespace mdl {
+namespace scan {
+
+using StringSet = std::set<std::string>;
+
+//----------------------------------------------------------------------------
+// Write out the entire MDL file.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteMDL(std::string &input_file, bool gen_arch_spec,
+                                  std::string &output_dir) {
+  OpenOutputFiles(input_file, gen_arch_spec, output_dir);
+  if (gen_arch_spec) {
+    WriteFileHeaders();
+    WritePipelinePhase();
+    WriteProcessorModel();
+    WriteFUInfo();
+    WriteSubunits();
+    WriteLatencies();
+    WritePredicates();
+  }
+
+  WriteRegisterSets();
+  WriteRegisterClasses();
+  WriteOperandsMdl();
+  WriteInstructionsMdl();
+
+  output_inst_->close();
+  if (gen_arch_spec)
+    output_arch_->close();
+}
+
+//----------------------------------------------------------------------------
+// Open the output files, check for errors.
+//----------------------------------------------------------------------------
+void MachineDescription::OpenOutputFiles(std::string &input_file,
+                                         bool gen_arch_spec,
+                                         std::string &output_dir) {
+  if (!output_dir.empty())
+    output_dir += "/";
+
+  if (output_name_.empty()) {
+    auto infile = std::filesystem::path(input_file);
+    output_name_ = infile.stem();
+  }
+
+#ifdef BUILDING_FOR_GOOGLE3
+  // Converting architecture name 'family_' to lowercase to use it as a prefix
+  // for generated file name. This is consistent with the naming convention used
+  // in other generated files (using other related tools like mdl compiler).
+  // Moreover, mdl specific bzl-build rules uses lowercase architecture name to
+  // specify generated file names as well as rule names and by convention, rules
+  // names need to be in lowercase.
+  std::transform output_name_.begin(), output_name_.end(), output_name_.begin(),
+                 [](unsigned char c) {
+    return std::tolower(c); });
+#endif
+
+  arch_file_name_ = output_dir + output_name_ + ".mdl";
+  inst_file_name_ = output_dir + output_name_ + "_instructions.mdl";
+
+  if (gen_arch_spec) {
+    output_arch_ = new std::fstream(arch_file_name_, std::fstream::out);
+    if (!output_arch_->is_open()) {
+      std::cerr << formatv("Cannot open output file \"{0}\", aborting\n",
+                           arch_file_name_);
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  output_inst_ = new std::fstream(inst_file_name_, std::fstream::out);
+  if (!output_inst_->is_open()) {
+    std::cerr << formatv("Cannot open output file \"{0}\", aborting\n",
+                         inst_file_name_);
+    exit(EXIT_FAILURE);
+  }
+}
+
+//----------------------------------------------------------------------------
+// Given a list of operands, return a vector of register operand reference
+// descriptors.
+//----------------------------------------------------------------------------
+void MachineDescription::GetRegisterOperands(std::string prefix,
+                                             std::string ref_type, int opnd_id,
+                                             StringVec &opnds,
+                                             OperandRefs &result) {
+  std::string separator = prefix.empty() ? "$" : ".";
+
+  for (auto &op : opnds) {
+    auto name = Instruction::opnd_name(op);
+    auto type = Instruction::opnd_type(op);
+    if (IsRegister(name))
+      result.emplace_back(ref_type, "$" + name, opnd_id);
+    if (IsRegisterClass(type)) {
+      if (opnds.size() == 1 && !prefix.empty())
+        result.emplace_back(ref_type, prefix, opnd_id);
+      else
+        result.emplace_back(ref_type, prefix + separator + name, opnd_id);
+    }
+    if (IsOperand(type)) {
+      GetRegisterOperands(prefix + separator + name, ref_type, opnd_id,
+                          operands_[type]->ops(), result);
+    }
+  }
+}
+
+//----------------------------------------------------------------------------
+// Given an instruction, return a list of register operand descriptors to be
+// used in latency references.  Each item contains the qualified operand
+// name, whether it's a def or use, and its original operand id.  Implicit
+// operands have negative operand ids to differentiate them from explicit
+// operands.
+//----------------------------------------------------------------------------
+OperandRefs MachineDescription::GetRegisterOperands(Instruction *instr) {
+  OperandRefs opnds;
+  GetRegisterOperands("", "def", 0, instr->outs(), opnds);
+  GetRegisterOperands("", "use", instr->outs().size(), instr->ins(), opnds);
+  int implicit_opnd_idx = instr->outs().size() + instr->ins().size();
+  for (auto &implicit_def : instr->implicit_defs())
+    opnds.emplace_back("def", "$" + implicit_def, -implicit_opnd_idx++);
+  for (auto &implicit_use : instr->implicit_uses())
+    opnds.emplace_back("use", "$" + implicit_use, -implicit_opnd_idx++);
+  return opnds;
+}
+
+// Determine if a single unit is a write (depending on its type).
+bool MachineDescription::IsWrite(const std::string &name,
+                                 const std::string &model) const {
+  auto unit = GetSchedRef(name, model);
+  if (IsSchedReadWrite(unit))
+    return rw_units_.at(unit)->is_write();
+  if (IsSchedVariant(unit))
+    return sched_variants_.at(unit)->is_write();
+  return true;
+}
+
+// Determine if a single unit is a variadic access.
+bool MachineDescription::IsVariadic(const std::string &name,
+                                    const std::string &model) const {
+  auto unit = GetSchedRef(name, model);
+  if (IsSchedVariant(unit))
+    return sched_variants_.at(unit)->is_variadic();
+  return false;
+}
+
+//----------------------------------------------------------------------------
+// Given a SchedReadWrite name and SchedModel name, lookup the name.  If it
+// is an alias, recursively look up the aliased name.
+//----------------------------------------------------------------------------
+SchedReadWrite *
+MachineDescription::GetSchedReadWrite(const std::string &name,
+                                      const std::string &model) const {
+  if (IsSchedAliasMatch(name) && sched_alias_matches_.at(name).count(model))
+    return GetSchedReadWrite(sched_alias_matches_.at(name).at(model), model);
+  if (IsSchedReadWrite(name))
+    return rw_units_.at(name);
+  return nullptr;
+}
+
+std::string MachineDescription::GetSchedRef(const std::string &name,
+                                            const std::string &model) const {
+  if (IsSchedAliasMatch(name) && sched_alias_matches_.at(name).count(model))
+    return GetSchedRef(sched_alias_matches_.at(name).at(model), model);
+  return name;
+}
+
+// Format a single reference.   Return true if it contained a variant record.
+bool MachineDescription::FormatRef(std::string &out, const Instruction *instr,
+                                   const std::string &sched_model,
+                                   const std::string &unit_name,
+                                   const std::string &operand, int repeat,
+                                   int &latency, bool &fu_seen,
+                                   std::string indent /* = "   " */) {
+  auto name = GetSchedRef(unit_name, sched_model);
+  if (name == "NoReadAdvance" || name == "ReadDefault")
+    return false;
+
+  // For SchedReadWrite objects, write out a single def or use.
+  if (IsSchedReadWrite(name)) {
+    if (auto *rw_unit = GetSchedReadWrite(name, sched_model)) {
+      if (auto *ref = rw_unit->HasModel(sched_model)) {
+        bool is_write = IsWrite(unit_name, sched_model);
+
+        // Adjust the latency. Note that we accumulate all the latencies
+        // from a write sequence. Read latencies start at one, and are
+        // adjusted by the Advance amount, which can be negative.
+        // Cycle 1 reads don't have to be explicitly written out.
+        if (is_write) {
+          latency += std::max(ref->latency, 1);
+        } else {
+          latency = ref->latency + 1;
+          if (latency == 1 && repeat < 2)
+            return false;
+        }
+
+        // Generate the reference. If it's a repeated reference, write out
+        // the repeat count.  Note that use latencies can be 0 or negative for
+        // ReadAdvance entries, so these need special formatting.
+        if (!operand.empty()) {
+          std::string count = (repeat > 1) ? formatv("[{0}]", repeat) : "";
+          std::string op = is_write ? "def" : "use";
+          if (latency < 1)
+            out += formatv(" {0}(E1-{1}, ${2});", op, -latency + 1, operand);
+          else
+            out += formatv(" {0}(E{1}{2}, ${3});", op, latency, count, operand);
+        }
+
+        // Write out functional unit, micro-ops, and flags information. These
+        // are of the following general form:
+        //      fus(<func-unit-references>, <micro-ops>, <flags>);
+        // A functional unit reference has three different forms:
+        //   <name>                      // use fu for 1 cycle
+        //   <name> < <n> >              // use fu for <n> cycles
+        //   <name> < <phase> : <n> >    // use fu for <n> cycles starting at
+        //                                   <phase>
+        if (is_write) {
+          std::string fus;
+          int fu_id = 0;
+          auto &res_cycles = ref->res_cycles;
+          auto &start_cycles = ref->start_cycles;
+          for (const auto &fu_name : ref->func_units()) {
+            // Currently tablegen defines "res_cycles" as starting at cycle
+            // 0, even if there is an optional "StartAt" attribute.  In MDL,
+            // we adjust for this so that the counting begins at the first
+            // cycle the resource is used in.
+            int start = !start_cycles.empty() ? start_cycles[fu_id] : 0;
+            int cycles = !res_cycles.empty() ? res_cycles[fu_id++] : 1;
+
+            std::string fu_cycles;
+            if (start != 0)
+              fu_cycles = formatv("<E{0}:{1}>", start + 1, cycles - start);
+            else if (cycles > 1)
+              fu_cycles = formatv("<{0}>", cycles);
+            fus += fu_name + fu_cycles + "&";
+            fu_seen = true;
+          }
+          if (!fus.empty())
+            fus.pop_back(); // delete trailing "&"
+
+          auto flags = ref->FormatFusFlags();
+          if (!fus.empty())
+            out += formatv(" fus({0}, {1}{2});", fus, ref->micro_ops, flags);
+          else if (ref->micro_ops > 0 || !flags.empty())
+            out += formatv(" fus({0}{1});", ref->micro_ops, flags);
+        }
+      }
+    }
+    return false;
+  }
+
+  // Do something reasonable with WriteSequences. There are only two
+  // WriteSequence cases of interest (in all current targets):
+  // - The repeat count is 1 for several write resources, or
+  // - The repeat cout is > 1 for a single write resource.
+  // If the repeat count is 1, we adjust the latency for each resource to
+  // reflect earlier writes in the sequence. If the repeat count is greater
+  // than 1, we accumulate latencies for the different writes.
+  // Rather than write out duplicate references for the single resource case,
+  // simply pass the repeat count to the underlying object.
+  if (IsWriteSequence(name)) {
+    int latency = 0;
+    bool ifs = false;
+    auto *sequence = write_sequences_.at(name);
+    auto &writes = sequence->writes();
+    for (auto &write : writes)
+      ifs |= FormatRef(out, instr, sched_model, write, operand,
+                       sequence->repeat() * repeat, latency, fu_seen, indent);
+    return ifs;
+  }
+
+  // Generate the if/then/else for variant references for an operand.  We
+  // generally want these to look like:
+  //       if <predicate> { <refs> }
+  //       else if <predicate> { <refs> }
+  //       ...
+  //       else { <refs> }
+  // If the SchedVariant is marked as Variadic, then each selected resource
+  // applies to a different variadic operand.
+  if (IsSchedVariant(name)) {
+    std::vector<std::tuple<std::string, std::string, std::string>> cc_refs;
+    std::string cond;
+    bool is_variadic = IsVariadic(name, sched_model);
+
+    for (const auto &var_name : sched_variants_[name]->variants()) {
+      auto *var = sched_vars_[var_name];
+
+      // Generate the defs and uses for this variant's resources. This may
+      // produce conditional references, unconditional references, or
+      // nothing at all.
+      std::string refs, ifs;
+      int variadic_id = 1;
+      for (const auto &select : var->selected()) {
+        std::string item;
+        auto opnd = is_variadic ? "$" + std::to_string(variadic_id++) : operand;
+        int latency = 0;
+        if (FormatRef(item, instr, sched_model, select, opnd, repeat, latency,
+                      fu_seen, indent + "   "))
+          ifs += item;
+        else
+          refs += item;
+      }
+      cc_refs.emplace_back(var->predicate(), ifs, refs);
+      if (var->predicate() == "NoSchedPred")
+        break;
+    }
+
+    // Look for trailing entries with empty reference clauses and delete them.
+    // Note that we can't delete empty clauses if there are subsequent else
+    // clauses.
+    for (unsigned idx = cc_refs.size(); idx > 0; --idx) {
+      auto &[pred, ifs, refs] = cc_refs[idx - 1];
+      if (ifs.empty() && refs.empty())
+        cc_refs.pop_back();
+      else
+        break;
+    }
+
+    // Write out the valid if/then/else clauses.
+    bool generated_if = false;
+    for (auto &[pred, ifs, refs] : cc_refs) {
+      cond += "\n" + indent;
+      if (generated_if)
+        cond += "else ";
+      if (pred != "NoSchedPred")
+        cond += "if " + PredName(pred) + " ";
+      cond += "{" + ifs;
+      if (!refs.empty() && !ifs.empty())
+        cond += indent + "  ";
+      cond += refs;
+      if (!ifs.empty())
+        cond += indent;
+      cond += " }";
+      generated_if = true;
+    }
+    out += cond + "\n";
+    return generated_if;
+  }
+
+  out += "ERROR(" + name + ")";
+  return false;
+}
+
+//----------------------------------------------------------------------------
+// Given an instruction, a SchedModel, and a vector of rw-units, produce
+// all the latency references for that instruction.
+// Return a tuple:
+//    - a string containing the formatted defs/uses.
+//    - the number of microops specified, or 0.
+//----------------------------------------------------------------------------
+constexpr bool kWriteUnmentionedOperands = false;
+
+std::string MachineDescription::FormatReferences(Instruction *instr,
+                                                 const std::string &sched_model,
+                                                 const InstRW *inst_rw) {
+  int reads = 0;
+  int writes = 0;
+  int variable_operands = 0;
+  auto &rw_units = inst_rw->rw_units();
+
+  //--------------------------------------------------------------------------
+  // For each rw_unit, create a set of operand references.
+  //--------------------------------------------------------------------------
+  std::string refs, ifs;
+  bool fu_seen = false;
+  for (const auto &unit : rw_units) {
+    bool is_write = IsWrite(unit, sched_model);
+    std::string opnd = is_write ? instr->out(writes) : instr->in(reads);
+    int opid = is_write ? instr->out_index(writes++) : instr->in_index(reads++);
+    if (opid >= 0 && gen_operand_indexes())
+      opnd = std::to_string(opid);
+
+    // If we didn't find a matching operand, create a reference to "extra"
+    // operands.  These look like "$$<number>" in the reference.
+    if (opnd.empty() && instr->has_variable_ops())
+      opnd = "$" + std::to_string(++variable_operands);
+    std::string out;
+    int lat = 0;
+    if (FormatRef(out, instr, sched_model, unit, opnd, 1, lat, fu_seen))
+      ifs += out;
+    else
+      refs += out;
+  }
+
+  //--------------------------------------------------------------------------
+  // Combine all the ifs with all the refs.  Try to combine everything on
+  // one line if possible.
+  //--------------------------------------------------------------------------
+  auto out = ifs;
+  if (!refs.empty()) {
+    if (!ifs.empty())
+      out += "  " + refs + "\n"; // Indent the refs.
+    else
+      out += refs;
+  }
+  //--------------------------------------------------------------------------
+  // If we didn't see any resource (functional unit) uses, generate an fus
+  // statement for the whole CPU (assuming we have one of those, which should
+  // be rare).
+  //--------------------------------------------------------------------------
+  if (!fu_seen && !out.empty()) {
+    if (sched_model.empty())
+      std::cerr << formatv("Warning: no functional unit assignments for {0}\n",
+                           instr->name());
+    else
+      out +=
+          formatv(" fus({0}, 0);", sched_models()[sched_model]->output_name());
+  }
+
+  //--------------------------------------------------------------------------
+  // Write out all the unmentioned operand references.  We don't bother at
+  // the moment, but we may want to do this explicitly at some point.
+  //--------------------------------------------------------------------------
+  if (kWriteUnmentionedOperands) {
+    out += "\n";
+    for (;;) {
+      auto write = instr->out(writes++);
+      if (write.empty())
+        break;
+      if (instr->may_load())
+        out += formatv("  def.(LOAD_PHASE, {0});", write);
+      else
+        out += formatv("  def.(E1, {0});", write);
+    }
+    for (;;) {
+      auto read = instr->in(reads++);
+      if (read.empty())
+        break;
+      out += formatv("  use.(E1, {0});", read);
+    }
+  }
+
+  //--------------------------------------------------------------------------
+  // Return the functional unit, the FU details, and the latency body.
+  //--------------------------------------------------------------------------
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Format a subunit associated with an instruction and an itinerary. These are
+// of the form:
+//    subunit sub_name(resource r0, r1,...) {{      // resources optional
+//       use(E1, r0, r1,...);                       // use any resources
+//       def(...); use(...); ...
+//    }}
+//----------------------------------------------------------------------------
+std::string MachineDescription::FormatItinSubunit(Instruction *inst,
+                                                  InstrItineraryData *itin) {
+  int parameters = itin->resource_args().size();
+
+  // Generate the resource defs for the subunit.
+  std::string resource_defs = parameters ? "resource " : "";
+  for (int idx = 0; idx < parameters; idx++)
+    resource_defs += formatv("{0}r{1}", idx ? ", " : "", idx);
+
+  // Generate the resource uses. The common case is that all resources are
+  // used in the same cycle, so try to combine uses into a single clause.
+  std::string resource_uses;
+  int resource_id = 0, phase = -1, time = -1;
+  for (auto &arg : itin->resource_args()) {
+    if (phase != arg.phase() || time != arg.cycles()) {
+      if (resource_id)
+        resource_uses += "); ";
+      resource_uses += formatv(" use(E{0}", arg.phase() + 1);
+      if (arg.cycles() != 1)
+        resource_uses += formatv(":{0}", arg.cycles());
+    }
+    resource_uses += formatv(", r{0}", resource_id++);
+    phase = arg.phase();
+    time = arg.cycles();
+  }
+
+  std::string out = "() {{";
+  if (parameters)
+    out = formatv("({0}) {{{{ {1});", resource_defs, resource_uses);
+
+  // Get descriptors of all the instructions register operands, and generate
+  // references for each of them. We assume that all registers in a single
+  // operand are all referenced in the same cycle.
+  auto operands = GetRegisterOperands(inst);
+  auto &cycles = itin->operand_cycles();
+  int cycles_size = static_cast<int>(cycles.size());
+
+  // Format defs/uses for each register operand.
+  for (auto &[type, name, op_id] : operands) {
+    // Determine what cycle this event takes place in.
+    auto cycle = op_id >= 0 && op_id < cycles_size ? cycles[op_id] : "0";
+    set_max_pipeline_phase(std::stoul(cycle.c_str()));
+    if (cycle != "0")
+      cycle = "E" + cycle;
+    else if (type == "def" && cycles.empty() && op_id >= 0 && inst->may_load())
+      cycle = "LOAD_PHASE";
+    else
+      cycle = "E1";
+    out += formatv("  {0}({1}, {2});", type, cycle, name);
+  }
+  return out + "}}";
+}
+
+// Write out issue resources for a cpu.
+static std::string FormatIssueResources(int num_issue_slots) {
+  if (num_issue_slots <= 0)
+    return "";
+  std::string out;
+  for (int idx = 0; idx < num_issue_slots; idx++)
+    out += formatv(" s{0},", idx);
+  out.pop_back(); // delete trailing comma
+
+  return formatv("  issue(F1){0};\n", out);
+}
+
+// Provide some CPU-specific default latencies.
+static std::string FormatDefaultLatencies(int load_latency, int high_latency) {
+  std::string out;
+  if (load_latency > 0)
+    out = formatv("LOAD_PHASE={0}", load_latency);
+  if (load_latency > 0 && high_latency > 0)
+    out += ", ";
+  if (high_latency > 0)
+    out += formatv("HIGH_PHASE={0}", high_latency);
+  if (out.empty())
+    return out;
+  return formatv("  protected phases defaults {{ {0} };\n", out);
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that represents a MDL CPU definition.
+//----------------------------------------------------------------------------
+std::string SchedMachineModel::Format(MachineDescription *md) {
+  if (cpus_.empty())
+    return "";
+
+  // Write out the start of the CPU definition - the name and subtargets.
+  std::string cpus;
+  for (auto &cpu : cpus_)
+    cpus += formatv("\"{0}\", ", cpu);
+  cpus.pop_back();
+  cpus.pop_back(); // delete trailing comma
+  auto out = formatv("cpu {0}({1}) {{\n", output_name_, cpus);
+
+  // Optionally write out a pipeline specification for the default load latency.
+  out += FormatDefaultLatencies(load_latency_, high_latency_);
+
+  // Write out issue slot and resource definitions.
+  if (!has_itineraries()) {
+    out += FormatIssueResources(issue_width_);
+  } else {
+    std::string slots, resources;
+    for (const auto &res :
+         md->processor_itineraries()[itineraries()]->resources()) {
+      if (!md->IsComboUnit(res)) {
+        auto *resset = md->IsIssueSlot(res) ? &slots : &resources;
+        if (!resset->empty())
+          *resset += ", ";
+        *resset += res;
+      }
+    }
+    if (!slots.empty())
+      out += formatv("  issue(F1) {0};\n", slots);
+    else
+      out += FormatIssueResources(issue_width_);
+
+    if (!resources.empty())
+      out += formatv("  resource {0};\n", resources);
+    out += "\n";
+  }
+
+  // If the CPU has a reorder buffer, write out a spec for that.
+  if (micro_op_buffer_size() > 0)
+    out += formatv("  reorder_buffer<{0}>;\n", micro_op_buffer_size());
+
+  // Write out schedule-based functional unit instances.
+  int count = 0;
+  for (const auto &unit : func_units_) {
+    auto *func_unit = md->proc_resources()[unit];
+    std::string buffer;
+    for (int idx = 0; idx < func_unit->num_instances(); idx++) {
+      out += formatv("  func_unit {0} U{1}();\n",
+                     func_unit->super_names()[idx].name(), count++);
+    }
+  }
+
+  // Write out itinerary-based functional unit instances.
+  if (has_itineraries()) {
+    for (auto *fu : itinerary_fus())
+      count = fu->FormatInstance(&out, count, md);
+  }
+
+  // Write out schedule-based functional unit forwarding graph.
+  if (md->gen_forwarding_info()) {
+    std::map<std::string, std::string> forwards;
+    for (const auto &[tuple, latency] : md->forwarding_network().graph()) {
+      const auto &[model, def, use] = tuple;
+      if (model == name_) {
+        std::string to = formatv("{0}({1})", use, *latency.begin());
+        if (!forwards.count(def))
+          forwards[def] = to;
+        else
+          forwards[def] += "," + to;
+      }
+    }
+    if (!forwards.empty())
+      out += "\n";
+    for (const auto &[from, to] : forwards)
+      out += formatv("  forward {0} -> {1};\n", from, to);
+  }
+
+  return out + "}\n\n";
+}
+
+//----------------------------------------------------------------------------
+// Generate all permutations of candidate resource set indexes for the
+// functional unit instance parameters.
+//----------------------------------------------------------------------------
+static void ExpandCombos(std::vector<std::vector<int>> &result,
+                         std::vector<int> &sizes, std::vector<int> &indexes,
+                         unsigned level) {
+  if (level == sizes.size()) {
+    result.push_back(indexes);
+    return;
+  }
+  for (int index = 0; index < sizes[level]; index++) {
+    indexes.push_back(index);
+    ExpandCombos(result, sizes, indexes, level + 1);
+    indexes.pop_back();
+  }
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that represents functional unit instances, and append
+// them to the current output string.  Return the number of instances
+// generated.
+//----------------------------------------------------------------------------
+int ItineraryFuncUnit::FormatInstance(std::string *out, int fu_id,
+                                      MachineDescription *md) {
+  auto *itin = itinerary();
+  auto &slots = itin->issue_stages();
+
+  // Given the parameter lists for this itinerary, each which has a set of
+  // resource specification options, build a permutation of all arguments'
+  // resource sets.
+  std::vector<int> parameter_sizes;
+  parameter_sizes.reserve(itin->resource_args().size());
+  for (auto arg : itin->resource_args())
+    parameter_sizes.push_back(arg.exprs().size());
+
+  std::vector<int> items;
+  std::vector<std::vector<int>> index_set;
+  ExpandCombos(index_set, parameter_sizes, items, 0);
+
+  // Use the generated index sets to create functional unit instances
+  // for each combination of resource specifications.
+  auto &args = itin->resource_args();
+  for (auto &combo : index_set) {
+    std::string resources;
+    for (unsigned arg = 0; arg < combo.size(); arg++)
+      resources += (arg ? ", " : "") + args[arg].exprs()[combo[arg]].ToString();
+    if (slots.empty())
+      *out +=
+          formatv("  func_unit {0}<> U{1}({2});\n", name(), fu_id++, resources);
+    for (auto &slot : slots)
+      *out += formatv("  func_unit {0}<> U{1}({2}) -> {3};\n", name(), fu_id++,
+                      resources, slot.ToString());
+  }
+  return fu_id;
+}
+
+//----------------------------------------------------------------------------
+// Helper function for Functional unit template definitions.
+// Generate a string containing resource names.
+//----------------------------------------------------------------------------
+static std::string FormatResSet(int stages) {
+  std::string out;
+  for (int res_id = 0; res_id < stages; res_id++)
+    out += formatv("r{0}, ", res_id);
+  if (!out.empty()) {
+    out.pop_back();
+    out.pop_back();
+  } // delete comma
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Write out subunit instances in a functional unit template definition.
+// Write out a few subunits on each line.
+//----------------------------------------------------------------------------
+static std::string FormatSubunitInstances(const std::set<int> &subunits,
+                                          std::string res_set,
+                                          std::string indent = "   ") {
+  std::string out, sub, separator;
+
+  for (auto subunit : subunits) {
+    auto item = formatv("sub{0}({1})", subunit, res_set);
+    if (sub.size() + item.size() + indent.size() + separator.size() >= 73) {
+      out += formatv("{0}subunit {1};\n", indent, sub);
+      sub = item;
+    } else {
+      sub += separator + item;
+    }
+    separator = ",";
+  }
+  if (!sub.empty())
+    out += formatv("{0}subunit {1};", indent, sub);
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Write out subunit instances in an itinerary-based functional unit template
+// definition. The input is a set of <subunit/ItineraryData> pairs. Each
+// itinerary has a set of functional units its associated with.
+// Most of the complexity of this function is making the output look pretty.
+//----------------------------------------------------------------------------
+std::string
+FormatSubunitInstances(MachineDescription *md,
+                       const std::set<std::pair<int, std::string>> &subunits,
+                       std::string res_set) {
+  std::string out;
+
+  // Any subunit may be associated with more than one SchedModel, so we
+  // create a map of SchedModel predicates with a set of subunits for
+  // each. Avoid long predicate lines by inserting linefeeds.
+  std::map<std::string, std::set<int>> models_and_subunits;
+  for (const auto &[subunit, itin_name] : subunits) {
+    std::string models, line;
+    auto *itins = md->instr_itinerary_data(itin_name);
+    for (const auto &model : itins->models()) {
+      if (line.size() + model.size() + 4 > 79) {
+        models += line + "\n   ";
+        line = "";
+      }
+      line += formatv("{0}, ", md->sched_models()[model]->output_name());
+    }
+    if (!line.empty()) {
+      line.erase(line.length() - 2);
+      line += ": ";
+    }
+    models += line;
+    models_and_subunits[models].insert(subunit);
+  }
+
+  // For each functional unit predicate set, write out its associated subunits.
+  // Note that its possible to have unpredicated functional units.  If there's
+  // only one predicate we don't need to write it out.
+  for (const auto &[models, subunit_ids] : models_and_subunits) {
+    auto pred = models.empty() ? "" : formatv("   {0}{{", models);
+    if (models_and_subunits.size() == 1)
+      pred.clear();
+    auto indent = pred.empty() ? "   " : "     ";
+    auto subs = FormatSubunitInstances(subunit_ids, res_set, indent);
+    if (pred.empty())
+      out += subs + "\n";
+    else if (pred.size() + subs.size() < 75)
+      out += pred + subs + " };\n";
+    else
+      out += pred + "\n" + subs + "\n   };\n";
+  }
+  return out;
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that represents an MDL Function Unit Template definition
+// for derived itineraries.
+//----------------------------------------------------------------------------
+std::string ItineraryFuncUnit::FormatTemplate(MachineDescription *md) {
+  std::string out = formatv("func_unit {0} (", name());
+  std::string res_set = FormatResSet(itinerary_->resource_args().size());
+  if (!res_set.empty())
+    out += "resource ";
+  out += formatv("{0}) {{\n", res_set);
+
+  // Write out subunit instances.
+  out += FormatSubunitInstances(md, subunits_, res_set);
+  return out + "}\n\n";
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that represents an MDL Function Unit Template definition.
+//----------------------------------------------------------------------------
+std::string ProcResource::Format(MachineDescription *md) const {
+  // Write out resource groups to define all the names.
+  if (is_resource_group()) {
+    std::string names;
+    auto size = buffer_size() != -1 ? formatv("<{0}>", buffer_size()) : "";
+    for (auto &fu : resource_group_)
+      names += fu + ", ";
+    names.pop_back(), names.pop_back(); // delete trailing comma
+    return formatv("func_group {0}{1}: {2};\n", name_, size, names);
+  }
+
+  // Format the bases.  Don't make the line too long.
+  std::string out = formatv("func_unit {0}", name_);
+  std::string bases;
+  int indent = out.size();
+  for (const auto &fu : base_func_units_) {
+    if (bases.size() + indent + fu.size() + 3 > 80) {
+      out += bases + "\n";
+      bases = "          ";
+      indent = 10;
+    }
+    bases += formatv(" : {0}", fu);
+  }
+
+  // Format the subunits for this functional unit, if any.
+  std::string subs = FormatSubunitInstances(subunits_, "");
+
+  // Write out the template if it has bases or subunits.
+  if (bases.empty() && subs.empty())
+    return "";
+
+  if (!subs.empty())
+    return formatv("func_unit {0}{1}() {{\n{2}\n}\n\n", name_, bases, subs);
+  return formatv("{0}{1}() {{}\n", out, bases);
+}
+
+//---------------------------------------------------------------------------
+// Format a register definition.
+//---------------------------------------------------------------------------
+std::string RegDefinition::Format() const {
+  if (index_ == last_index_)
+    return name_;
+  return formatv("{0}[{1}..{2}]", prefix_, index_, last_index_);
+}
+
+//---------------------------------------------------------------------------
+// Format a register class definition.
+//---------------------------------------------------------------------------
+std::string RegisterClass::Format() {
+  if (!registers_.empty())
+    std::sort(registers_.begin(), registers_.end());
+  std::string out;
+  for (unsigned i = 0; i < registers_.size(); i++) {
+    int first, last;
+    if (!out.empty())
+      out += ", ";
+    std::string prefix = registers_[i].prefix();
+    first = registers_[i].index();
+    if (first == -1 || i == registers_.size() - 1) {
+      out += registers_[i].name();
+    } else {
+      for (last = first; i + 1 < registers_.size(); i++) {
+        if (registers_[i + 1].prefix() != prefix ||
+            registers_[i + 1].index() != last + 1)
+          break;
+        last = registers_[i + 1].index();
+      }
+      if (first == last)
+        out += formatv("{0}{1}", prefix, first);
+      else
+        out += formatv("{0}[{1}..{2}]", prefix, first, last);
+    }
+  }
+  return formatv("register_class {0} {{ {1} };\n", name_, out);
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that represents an MDL Operand record.
+//----------------------------------------------------------------------------
+std::string Operand::Format() const {
+  std::string out;
+  for (const auto &opnd : ops_) {
+    if (!out.empty())
+      out += ", ";
+    out += opnd;
+  }
+  return formatv("operand {0}({1}) {{ type({2}); }\n", name_, out, type_);
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that represents an MDL Instruction record.
+//----------------------------------------------------------------------------
+std::string Instruction::Format(bool full_definition) const {
+  constexpr auto ellipsis = "...";
+  std::string out, opnds, variable_ops;
+
+  for (const auto &opnd : outs_) {
+    if (opnd == ellipsis) {
+      variable_ops = ellipsis;
+    } else {
+      if (!opnds.empty())
+        opnds += ", ";
+      opnds += formatv("{0}(O)", opnd);
+    }
+  }
+  for (const auto &opnd : ins_) {
+    if (opnd == ellipsis) {
+      variable_ops = ellipsis;
+    } else {
+      if (!opnds.empty())
+        opnds += ", ";
+      opnds += formatv("{0}(I)", opnd);
+    }
+  }
+  for (const auto &opnd : implicit_defs_) {
+    if (!opnds.empty())
+      opnds += ", ";
+    opnds += formatv("{0}(O)", opnd);
+  }
+  for (const auto &opnd : implicit_uses_) {
+    if (!opnds.empty())
+      opnds += ", ";
+    opnds += formatv("{0}(I)", opnd);
+  }
+
+  if (!opnds.empty() && !variable_ops.empty())
+    opnds += ", ";
+
+  out = formatv("instruction {0}({1}{2}) {{", name_, opnds, variable_ops);
+  if (pseudo_)
+    out += "  // pseudo-instr";
+  out += "\n";
+
+  if (!full_definition)
+    return out;
+
+  // write out declared subunits
+  if (HasSubunits()) {
+    out += "     subunit(";
+    for (auto &unit : *subunit_) {
+      out += unit + ", ";
+    }
+    out.pop_back();
+    out.pop_back(); // delete trailing comma
+    out += ");\n";
+  }
+
+  // write out derived subunits
+  if (!subunits().empty()) {
+    std::string units;
+    for (auto &unit : subunits())
+      units += formatv("sub{0},", unit);
+    units.pop_back(); // delete trailing comma
+    out += formatv("     subunit({0});\n", units);
+  }
+
+  if (HasChildren()) {
+    out += "     derived(";
+    for (auto *child : children_)
+      out += formatv("{0}, ", child->name());
+    out.pop_back();
+    out.pop_back(); // delete trailing comma
+    out += ");\n";
+  }
+
+  if (!assembly_.empty())
+    out += formatv("     // {0}\n", assembly_);
+  return out + "}\n";
+}
+
+//----------------------------------------------------------------------------
+// Generate a string that is a CSV representation of an instruction and
+// all of its operands.
+//----------------------------------------------------------------------------
+std::string Instruction::ToCsvString() const {
+  std::string out = name_;
+
+  // write out operands - pad them out so the operands all line up
+  for (auto &opnd : outs_)
+    out += formatv("\t({0})", opnd);
+  for (int oc = 1 - outs_.size(); oc > 0; oc--)
+    out += "\t.";
+  for (auto const &in : ins_)
+    out += formatv("\t{0}", in);
+  for (int ic = 9 - ins_.size(); ic > 0; ic--)
+    out += "\t.";
+
+  out += formatv("\t assembly:({0})\t pattern:{1}", assembly_, pattern_);
+  if (!parent_.empty())
+    out += formatv("\t parent:{0}", parent_);
+
+  if (HasSubunits()) {
+    out += "\t subunit:";
+    for (auto &unit : *subunit_)
+      out += formatv(" {0}", unit);
+  }
+
+  return out + "\n";
+}
+
+constexpr int kNoEntries = -1;
+constexpr char divider[] =
+    "//---------------------------------------------------------------------\n";
+constexpr char file_banner[] =
+    "// This file is autogenerated from an LLVM Target Description File.\n";
+
+//----------------------------------------------------------------------------
+// Write out a file header.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteFileHeaders() {
+  output_inst() << formatv("{0}{1}{0}family {2};\n", divider, file_banner,
+                           family());
+  output_arch() << formatv("{0}{1}{0}import \"{2}\"\n", divider, file_banner,
+                           inst_file_name_);
+}
+
+//----------------------------------------------------------------------------
+// Write a header for each generated architecture specification section.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteArchHeader(const char *section, int entries) {
+  if (entries == 0)
+    return;
+  auto out = formatv("\n{0}// {1}", divider, section);
+  if (entries != kNoEntries)
+    out += formatv(" ({0} entries)", entries);
+  output_arch() << out + "\n" + divider;
+}
+
+//----------------------------------------------------------------------------
+// Write a header for each instruction description section.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteInstHeader(const char *section) {
+  output_inst() << formatv("\n{0}// {1}\n{0}", divider, section);
+}
+
+//----------------------------------------------------------------------------
+// Dump out pipeline phases.
+//----------------------------------------------------------------------------
+void MachineDescription::WritePipelinePhase() {
+  WriteArchHeader("Pipeline phase definitions", kNoEntries);
+  int max_phase = 0;
+
+  // Check RWUnit records for latency information.
+  for (const auto &[name, rw_unit] : rw_units_)
+    for (const auto &[sched_model, sched_model_info] :
+         rw_unit->sched_model_info())
+      max_phase = std::max(max_phase, sched_model_info.latency);
+
+  // Check Itinerary records for latency information.
+  for (auto [name, itinerary_data] : instr_itinerary_data_)
+    for (auto &cycle : itinerary_data->operand_cycles())
+      max_phase = std::max(max_phase, std::stoi(cycle, nullptr, 0));
+
+  if (max_phase == 0)
+    max_phase = max_pipeline_phase();
+  std::string phases = max_phase ? formatv("E[1..{0}]", max_phase + 1) : "E1";
+  output_arch() << formatv("protected phases {0} {{ F1, {1} };\n", family(),
+                           phases);
+}
+
+//----------------------------------------------------------------------------
+// Write out all processor variants.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteProcessorModel() {
+  WriteArchHeader("CPU Description Classes", sched_models_.size());
+  for (const auto &[name, model] : sched_models_)
+    output_arch() << model->Format(this);
+}
+
+// ----------------------------------------------------------------------------
+// Write out all functional template definitions.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteFUInfo() {
+  WriteArchHeader("Functional Unit Groups", kNoEntries);
+  for (const auto &[name, fu] : proc_resources_)
+    if (fu->is_resource_group())
+      output_arch() << fu->Format(this);
+  output_arch() << "\n";
+
+  WriteArchHeader("Functional Unit Templates", kNoEntries);
+  // Write out Schedule-based functional units.
+  for (const auto &[name, fu] : proc_resources_)
+    if (!fu->is_resource_group())
+      output_arch() << fu->Format(this);
+
+  // Write out itinerary functional unit templates.
+  for (const auto &[stages, fu] : itinerary_fus_)
+    output_arch() << fu->FormatTemplate(this);
+}
+
+//----------------------------------------------------------------------------
+// Write out all subunit template definitions.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteSubunits() {
+  WriteArchHeader("Subunit Definitions", subunits_.size());
+
+#ifndef WRITE_SUBUNITS_FOR_EACH_SCHED_MODEL
+  for (const auto &[subunit, index] : subunits_)
+    output_arch() << formatv("subunit sub{0}{1}\n", index, subunit);
+  output_arch() << "\n";
+#else
+  for (auto &[model_name, model] : sched_models_) {
+    output_arch() << formatv("\n// Subunits for {0}\n", model_name);
+    for (auto &[subunit, index] : subunits_)
+      if (sched_model_subunits_[index] == model_name)
+        output_arch() << formatv("subunit sub{0}{1}\n", index, subunit);
+  }
+#endif
+
+  for (const auto &[base, index] : subunit_bases_)
+    output_arch() << formatv("subunit base{0}{1}() {{}\n", index, base);
+}
+
+//----------------------------------------------------------------------------
+// Write out all latency template definitions.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteLatencies() {
+  WriteArchHeader("Latency Definitions", latencies_.size());
+  for (const auto &[latency, idx] : latencies_)
+    output_arch() << formatv("latency lat{0}() {{{1}}\n", idx, latency);
+}
+
+static std::string PredIndent(int indent) {
+  return std::string(indent * 2 + 2, ' ');
+}
+
+static bool IsMultiLine(std::string &pred) {
+  if (!str_contains(pred, "\n"))
+    return false;
+  if (pred.size() > 70)
+    return true;
+  pred.erase(std::remove(pred.begin(), pred.end(), '\n'), pred.end());
+  return false;
+}
+
+//----------------------------------------------------------------------------
+// Functions managing whether predicates should be written out.
+//----------------------------------------------------------------------------
+bool MachineDescription::IsPredicateReferenced(const std::string &name) {
+  if (IsMCSchedPredicate(name))
+    return mc_sched_predicates()[name]->is_referenced();
+  if (IsMCInstPredicate(name))
+    return mc_inst_predicates()[name]->is_referenced();
+  if (IsMCStatement(name))
+    return statements()[name]->is_referenced();
+  if (IsMCOpcodeSwitchCase(name))
+    return switch_cases()[name]->is_referenced();
+  return true;
+}
+
+// Mark the predicate expression as referenced.
+void MachineDescription::SetPredicateReferenced(const std::string &name) {
+  if (IsMCSchedPredicate(name))
+    mc_sched_predicates()[name]->set_is_referenced();
+  if (IsMCInstPredicate(name))
+    mc_inst_predicates()[name]->set_is_referenced();
+  if (IsMCStatement(name))
+    statements()[name]->set_is_referenced();
+  if (IsMCOpcodeSwitchCase(name))
+    switch_cases()[name]->set_is_referenced();
+}
+
+bool MachineDescription::OutputPredicate(const std::string &name) {
+  if (name == "NoSchedPred")
+    return false;
+  return name != kTrue && name != kFalse &&
+         (!str_contains(name, "anonymous_") || IsPredicateReferenced(name));
+}
+
+//----------------------------------------------------------------------------
+// When writing out predicate names, avoid writing out anonymous names.
+// If it's an MCSchedPredicate, return its base predicate.
+//----------------------------------------------------------------------------
+std::string MachineDescription::PredName(const std::string &name) {
+  if (str_contains(name, "anonymous_") && IsMCSchedPredicate(name))
+    return PredName(mc_sched_predicates()[name]->inst_predicate());
+
+  SetPredicateReferenced(name);
+  return name;
+}
+
+//----------------------------------------------------------------------------
+// Functions for writing out predicate expressions. In general we'd like these
+// to look nice, and approximately what they looked like in the original
+// tablegen source.
+//----------------------------------------------------------------------------
+
+// Format a named predicate reference.
+std::string MachineDescription::FormatPred(const std::string &name,
+                                           int indent) {
+  std::string out;
+  if (!str_contains(name, "anonymous_"))
+    return name;
+  if (IsMCInstPredicate(name))
+    out = mc_inst_predicates()[name]->Format(this, indent);
+  else if (IsMCStatement(name))
+    out = statements()[name]->Format(this, indent);
+  else if (IsMCOpcodeSwitchCase(name))
+    out = switch_cases()[name]->Format(this, indent);
+  return out;
+}
+
+// Format an Instruction Predicate Object.
+std::string MCInstPredicate::Format(MachineDescription *md, int indent) const {
+  std::string out;
+
+  // Handle predicates that handle sets of opcodes.
+  if (!valid_opcodes_.empty()) {
+    auto sep = "";
+    for (auto &op : valid_opcodes_) {
+      out += formatv("{0}{1}", sep, op);
+      sep = ",";
+    }
+    return formatv("{0}<{1}>", kCheckOpcode, out);
+  }
+
+  // Handle compound predicates (CheckAny, CheckAll, CheckNot).
+  if (!predicates_.empty()) {
+    auto sep = "";
+    out += formatv("{0}<", attributes_[0]);
+    for (auto &pred : predicates_) {
+      out += formatv("{0}\n{1}", sep, PredIndent(indent + 1));
+      out += md->FormatPred(pred, indent + 1);
+      sep = ",";
+    }
+    return out + ">";
+  }
+
+  // Handle statement predicates.
+  if (!statement_.empty())
+    return out + md->FormatPred(statement_, indent);
+
+  if (attributes_.empty())
+    return "";
+
+  // Handle all the rest (CheckIsRegOperand, etc)
+  std::string op = attributes_[0];
+
+  // CheckRegOperand always has 2 required parameters, and an optional
+  // function_mapper.
+  if (op == kCheckRegOperand) {
+    if (!function_mapper_.empty())
+      out = formatv(", {0}", function_mapper_);
+    return formatv("{0}<{1},{2}{3}>", op, opindex_, register_name_, out);
+  }
+  // CheckImmOperand has an optional "value" parameter and an optional
+  // function_mapper.  If the value parameter is missing, but we have an
+  // function_mapper, pass an empty value parameter.
+  if (op == kCheckImmOperand) {
+    if (!immval_.empty())
+      out = formatv(", {0}", immval_);
+    else if (!function_mapper_.empty())
+      out = ", \"\"";
+    if (!function_mapper_.empty())
+      out += formatv(", {0}", function_mapper_);
+    return formatv("{0}<{1}{2}>", op, opindex_, out);
+  }
+  if (op == kCheckZeroOperand || op == kCheckIsRegOperand ||
+      op == kCheckIsImmOperand || op == kCheckInvalidRegOperand) {
+    return formatv("{0}<{1}>", op, opindex_);
+  }
+  if (op == kCheckSameRegOperand) {
+    return formatv("{0}<{1},{2}>", op, opindex_, immval_);
+  }
+  if (op == kCheckFunctionPredicate) {
+    return formatv("{0}<{1},{2}>", op, function_name_, function_mapper_);
+  }
+  if (op == kCheckFunctionPredicateWithTII) {
+    return formatv("{0}<{1},{2},{3}>", op, function_name_, function_mapper_,
+                   immval_);
+  }
+  if (op == kCheckNumOperands) {
+    return formatv("{0}<{1}>", op, immval_);
+  }
+  // By default, we don't pass any parameters.
+  return formatv("{0}<>", op);
+}
+
+// Format an MCStatement predicate object.
+std::string MCStatement::Format(MachineDescription *md, int indent) const {
+  std::string out;
+  if (!predicate_.empty()) {
+    auto pred = md->FormatPred(predicate_, indent + 1);
+    if (IsMultiLine(pred))
+      pred = formatv("\n{0}{1}", PredIndent(indent + 1), pred);
+    return out +
+           formatv("\n{0}{1}<{2}>", PredIndent(indent), kReturnStatement, pred);
+  }
+
+  out += formatv("{0}<\n", kOpcodeSwitchStmt);
+  for (auto &item : cases_)
+    out += formatv("{0},", md->FormatPred(item, indent + 1));
+  if (!default_.empty())
+    out += md->FormatPred(default_, indent + 1);
+  return out + ">";
+}
+
+// Format an OpcodeSwitchCase predicate object.
+std::string MCOpcodeSwitchCase::Format(MachineDescription *md,
+                                       int indent) const {
+  std::string out = PredIndent(indent);
+
+  out += formatv("{0}<\n{1}[", kOpcodeSwitchCase, PredIndent(indent + 1));
+  auto sep = "";
+  for (auto &opcode : cases_) {
+    out += formatv("{0}{1}", sep, opcode);
+    sep = ",";
+  }
+  out += "],";
+  if (!case_stmt_.empty()) {
+    out += md->FormatPred(case_stmt_, indent + 2);
+  }
+  return out + ">";
+}
+
+//----------------------------------------------------------------------------
+// Write out all predicate definitions.
+//----------------------------------------------------------------------------
+void MachineDescription::WritePredicates() {
+  WriteArchHeader("Predicate Definitions", kNoEntries);
+
+  for (const auto &[name, predicate] : mc_inst_predicates_)
+    if (OutputPredicate(name)) {
+      auto pred = predicate->Format(this, 0);
+      if (IsMultiLine(pred))
+        pred = formatv("\n{0}{1}", PredIndent(0), pred);
+      output_arch() << formatv("\npredicate {0} : {1};\n", name, pred);
+    }
+
+  for (const auto &[name, predicate] : mc_sched_predicates_) {
+    if (OutputPredicate(name)) {
+      auto pred = FormatPred(predicate->inst_predicate(), 0);
+      if (IsMultiLine(pred))
+        pred = formatv("\n{0}{1}", PredIndent(0), pred);
+      output_arch() << formatv("\npredicate {0} : {1};\n", name, pred);
+    }
+  }
+
+  for (const auto &[name, predicate] : sched_predicates_) {
+    if (OutputPredicate(name)) {
+      auto pred = FormatPred(predicate->predicate(), 0);
+      if (IsMultiLine(pred))
+        pred = formatv("\n{0}{1}", PredIndent(0), pred);
+      output_arch() << formatv("\npredicate {0} : [{{{1}}];\n", name, pred);
+    }
+  }
+
+  for (const auto &[name, predicate] : statements_)
+    if (OutputPredicate(name)) {
+      auto pred = predicate->Format(this, 0);
+      if (IsMultiLine(pred))
+        pred = formatv("\n{0}{1}", PredIndent(0), pred);
+      output_arch() << formatv("\npredicate {0} : {1};\n", name, pred);
+    }
+
+  for (const auto &[name, predicate] : switch_cases_)
+    if (OutputPredicate(name)) {
+      auto pred = predicate->Format(this, 0);
+      if (IsMultiLine(pred))
+        pred = formatv("\n{0}{1}", PredIndent(0), pred);
+      output_arch() << formatv("\npredicate {0} : {1};\n", name, pred);
+    }
+
+  output_arch() << "\n";
+}
+
+//----------------------------------------------------------------------------
+// Dump out register sets.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteRegisterSets() {
+  FindRegisterSets();
+
+  WriteInstHeader("Register definitions");
+  for (const auto &reg : register_sets_)
+    output_inst() << formatv("register {0};\n", reg.Format());
+}
+
+//----------------------------------------------------------------------------
+// Write out all referenced register classes.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteRegisterClasses() {
+  WriteInstHeader("Register Classes");
+  for (const auto &[name, regclass] : register_class_list_) {
+    GetMemberList(regclass);
+    if (regclass->referenced())
+      output_inst() << regclass->Format();
+  }
+}
+
+//----------------------------------------------------------------------------
+// Write out all referenced operand templates.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteOperandsMdl() {
+  WriteInstHeader("Operand Definitions");
+  for (const auto &[name, opnd] : operands_)
+    if (opnd->referenced())
+      output_inst() << opnd->Format();
+}
+
+//----------------------------------------------------------------------------
+// Write out all instruction definitions.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteInstructionsMdl() {
+  WriteInstHeader("Instruction Definitions");
+
+  if (!ignore_subunits()) {
+    // Write out instructions sorted by subunit, then by instruction name.
+    std::map<std::string, std::vector<Instruction *>> subunit_map;
+    for (const auto &[name, ins] : instructions_)
+      if (ins->HasSubunits())
+        subunit_map[(*ins->subunit_)[0]].push_back(ins);
+
+    for (const auto &[name, instruct_set] : subunit_map)
+      for (auto *ins : instruct_set)
+        output_inst() << ins->Format(true);
+  } else {
+    // Write out all instructions whether or not they have subunits.
+    for (const auto &[name, ins] : instructions_)
+      output_inst() << ins->Format(true);
+  }
+}
+
+//----------------------------------------------------------------------------
+// Write out CSV entries for each instruction.
+//----------------------------------------------------------------------------
+void MachineDescription::WriteInstructionsCsv() {
+  std::cout << "-------------- Instructions --------------" << std::endl;
+  for (const auto &[name, ins] : instructions_)
+    ins->WriteCsv();
+}
+
+} // namespace scan
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/TdScan/register.cpp b/llvm/utils/TdScan/register.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/register.cpp
@@ -0,0 +1,315 @@
+//===- register.cpp - Process tablegen register definitions ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Process register definitions in the tablegen file. We capture register
+// definitions and register class definitions.  Register classes can use
+// expressions to specify class members so we need to process these
+// expressions:
+//      expr : '(' 'add' expr (',' expr )* ')'
+//           | '(' 'sub' expr (',' expr)* ')'
+//           | '(' 'and' expr (',' expr)* ')'
+//           | '(' 'shl' expr ',' <count> ')'
+//           | '(' 'rotl' expr ',' <count> ')'
+//           | '(' 'rotr' expr ',' <count> ')'
+//           | '(' 'trunc' expr ',' <count> ')'
+//           | '(' 'sequence' "prefix", <first>, <last> ')'
+//           | '(' 'decimate' expr  <number> ')'
+//           | register_name
+//           | register_class_name
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <cctype>
+#include <string>
+
+#include "scan.h"
+
+namespace mpact {
+namespace mdl {
+namespace scan {
+
+using DagExprFunc = RegSet (MachineDescription::*)(char *&);
+
+struct DagExpressions {
+  const char *operation;
+  RegSet (MachineDescription::*func)(char *&);
+};
+
+DagExpressions DagExpr[] = {
+    {"add", &MachineDescription::RegClassAdd},
+    {"sub", &MachineDescription::RegClassSub},
+    {"and", &MachineDescription::RegClassAnd},
+    {"shl", &MachineDescription::RegClassAnd},
+    {"rotl", &MachineDescription::RegClassRotl},
+    {"rotr", &MachineDescription::RegClassRotr},
+    {"trunc", &MachineDescription::RegClassTrunc},
+    {"sequence", &MachineDescription::RegClassSequence},
+    {"decimate", &MachineDescription::RegClassDecimate},
+    {nullptr, nullptr},
+};
+
+//----------------------------------------------------------------------------
+// Parse a MemberList expression and generate the specified register list.
+//----------------------------------------------------------------------------
+RegSet MachineDescription::ParseExpr(char *&input) {
+  RegSet regs;
+
+  if (strlen(input) == 0)
+    return regs;
+  for (; *input && isspace(*input); input++) {
+  }
+
+  // If we have a subexpression, look up the operator and perform the function.
+  if (input[0] == '(') {
+    char *name = input + 1;
+    int len = strcspn(input, " )");
+    if (input[len] == ' ') {
+      input[len] = 0;
+      input += len + 1;
+      for (int i = 0; DagExpr[i].operation != nullptr; i++)
+        if (!strcmp(name, DagExpr[i].operation))
+          return (this->*DagExpr[i].func)(input);
+      return regs;
+    }
+    // If this an expression of the form (op), just return an empty set.
+    input += len + 1;
+    return regs;
+  }
+
+  // If the expression isn't a subexpression, parse the name.
+  int len = strcspn(input, " ,)");
+  char delim = input[len];
+  input[len] = 0;
+  std::string name = input;
+  *(input += len) = delim;
+
+  // if this is a register class, add all the members to the return set.
+  if (register_class_list_.count(name))
+    return GetMemberList(register_class_list_[name]);
+  // If it's a register, add just the one register.
+  if (register_dict_.count(name))
+    regs.emplace_back(name);
+  // If it's neither a register or a register class, just return an empty
+  // list. We may want to enhance this for register class tuples at some
+  // point.
+  return regs;
+}
+
+// Process an add operator: '(' 'add' <expr> (',' <expr>)* ')'.
+RegSet MachineDescription::RegClassAdd(char *&input) {
+  RegSet regs;
+  for (;;) {
+    for (auto &item : ParseExpr(input))
+      if (std::find(regs.begin(), regs.end(), item) == regs.end())
+        regs.push_back(item);
+    if (input[0] != ',')
+      break;
+    input++;
+  }
+  input = strchr(input, ')') + 1;
+  return regs;
+}
+
+// Process a sub operator: '(' 'sub' <expr> (',' <expr>)* ')'.
+RegSet MachineDescription::RegClassSub(char *&input) {
+  auto regs = ParseExpr(input);
+  while (input[0] == ',') {
+    auto sub = ParseExpr(++input);
+
+    for (auto &item : sub) {
+      auto found = std::find(regs.begin(), regs.end(), item);
+      if (found != regs.end())
+        regs.erase(found, found + 1);
+    }
+  }
+  input = strchr(input, ')') + 1;
+  return regs;
+}
+
+// Process an and operator: '(' 'and' <expr>, <expr>) ')'.
+RegSet MachineDescription::RegClassAnd(char *&input) {
+  auto rega = ParseExpr(input);
+  if ((input[0] != ','))
+    return rega;
+  auto regb = ParseExpr(++input);
+  input = strchr(input, ')') + 1;
+
+  RegSet result;
+  for (auto &item : rega)
+    if (std::find(regb.begin(), regb.end(), item) != regb.end())
+      result.push_back(item);
+  return result;
+}
+
+// Process a shl operator: '(' 'shl' <class> ',' <count> ')'.
+RegSet MachineDescription::RegClassShl(char *&input) {
+  auto regs = ParseExpr(input);
+  unsigned count;
+  if (sscanf(input, ",%u)", &count) != 1)
+    return regs;
+  input = strchr(input, ')') + 1;
+
+  if (count > regs.size())
+    count = regs.size();
+  regs.erase(regs.begin(), regs.begin() + count); // erase first N regs
+  return regs;
+}
+
+// Process a rotl operator: '(' 'rotl' <class> ',' <count> ')'.
+RegSet MachineDescription::RegClassRotl(char *&input) {
+  auto regs = ParseExpr(input);
+  unsigned count;
+  if (sscanf(input, ",%u)", &count) != 1)
+    return regs;
+  input = strchr(input, ')') + 1;
+
+  if (count >= regs.size())
+    return regs;
+  auto rot = regs;
+  regs.erase(regs.begin(), regs.begin() + count);  // erase the first N regs
+  rot.resize(count);                               // truncate to first N regs
+  regs.insert(regs.end(), rot.begin(), rot.end()); // concatenate them
+  return regs;
+}
+
+// Process a rotr operator: '(' 'rotr' <class> ',' <count> ')'.
+RegSet MachineDescription::RegClassRotr(char *&input) {
+  auto regs = ParseExpr(input);
+  unsigned count;
+  if (sscanf(input, ",%u)", &count) != 1)
+    return regs;
+  input = strchr(input, ')') + 1;
+
+  if (count >= regs.size())
+    return regs;
+  count = regs.size() - count; // N = size - count
+
+  auto rot = regs;
+  regs.erase(regs.begin(), regs.begin() + count);  // erase the first N regs
+  rot.resize(count);                               // truncate to first N regs
+  regs.insert(regs.end(), rot.begin(), rot.end()); // Concatenate them
+  return regs;
+}
+
+// Process a trunc operator: '(' 'trunc' <class> ',' <count> ')'.
+RegSet MachineDescription::RegClassTrunc(char *&input) {
+  auto regs = ParseExpr(input);
+  unsigned count;
+  if (sscanf(input, ",%u)", &count) != 1)
+    return regs;
+  input = strchr(input, ')') + 1;
+
+  if (count < regs.size())
+    regs.resize(count); // truncate to first n regs
+  return regs;
+}
+
+// Process a decimate operator: '(' 'decimate' <class> ',' <count> ')'.
+RegSet MachineDescription::RegClassDecimate(char *&input) {
+  auto regs = ParseExpr(input);
+  RegSet decimated;
+
+  unsigned count;
+  if (sscanf(input, ",%u)", &count) != 1)
+    return regs;
+  input = strchr(input, ')') + 1;
+
+  for (unsigned i = 0; i < regs.size(); i += count)
+    decimated.push_back(regs[i]);
+  return decimated;
+}
+
+//----------------------------------------------------------------------------
+// Parse and generate a sequence of registers:
+//              '(' 'sequence "format", <first>, <last> ')'
+//----------------------------------------------------------------------------
+RegSet MachineDescription::RegClassSequence(char *&input) {
+  RegSet regs;
+  char format[100];
+  unsigned first, last;
+  if (sscanf(input, "\"%s %u,%u)", format, &first, &last) != 3)
+    return regs;
+  if (last < first)
+    return regs;
+  input = strchr(input, ')') + 1;
+
+  if (auto *quote = strchr(format, '"')) {
+    *quote = 0; // strip off trailing quote
+    for (unsigned id = first; id <= last; id++) {
+      char reg_name[100];
+      snprintf(reg_name, sizeof(reg_name), format, id);
+      regs.emplace_back(reg_name);
+    }
+  }
+  return regs;
+}
+
+//----------------------------------------------------------------------------
+// Scan a register class member list, collect all the registers.
+// If its register set is empty, go populate the class.
+//----------------------------------------------------------------------------
+RegSet MachineDescription::GetMemberList(RegisterClass *reg_class) {
+  if (reg_class->parsed())
+    return reg_class->registers();
+  reg_class->set_parsed();
+
+  if (reg_class->registers().empty())
+    if (char *members = reg_class->member_list())
+      reg_class->set_registers(ParseExpr(members));
+
+  return reg_class->registers();
+}
+
+//----------------------------------------------------------------------------
+// Given a register name, find its prefix and its (optional) index (suffix).
+// This will aid in finding logical sets of registers (in FindRegisterSets()).
+//----------------------------------------------------------------------------
+std::string GetRegisterPrefix(const std::string &reg, int &regid) {
+  // Find trailing digits.
+  unsigned suffix = reg.length();
+  while (std::isdigit(reg.at(suffix - 1)))
+    suffix--;
+
+  // if there's no suffix, return -1 for index, and the whole name.
+  if (suffix == reg.length()) {
+    regid = -1;
+    return reg;
+  }
+
+  // But don't include leading 0s in the suffix (like R03), since this would
+  // confuse our coalescing efforts (turning R0, R1, R2 into R[0-2]).
+  for (; reg.at(suffix) == '0' && suffix + 1 < reg.length(); suffix++) {
+  }
+
+  regid = std::stoi(reg.substr(suffix, reg.length()).c_str(), nullptr, 10);
+  return reg.substr(0, suffix);
+}
+
+//----------------------------------------------------------------------------
+// Given the set of defined register names, discover register classes.
+// Combine adjacent register definitions that have identical prefixes and
+// sequential suffixes.  Note: This is purely for readability of the output.
+//----------------------------------------------------------------------------
+void MachineDescription::FindRegisterSets() {
+  // If a register can be combined with the previously defined register
+  // (same prefix, sequential suffixes), combine them. Otherwise add
+  // the new register to the list.
+  for (auto const &reg : registers_) {
+    if (!register_sets_.empty() && register_sets_.back().index() != -1 &&
+        reg.prefix() == register_sets_.back().prefix() && reg.index() != -1 &&
+        reg.index() == register_sets_.back().last_index() + 1)
+      register_sets_.back().set_last_index(reg.index());
+    else
+      register_sets_.push_back(reg);
+  }
+}
+
+} // namespace scan
+} // namespace mdl
+} // namespace mpact
diff --git a/llvm/utils/TdScan/scan.h b/llvm/utils/TdScan/scan.h
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/scan.h
@@ -0,0 +1,1838 @@
+//===- scan.h - Definitions for scanning tablegen files -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Read through an llvm table-gen-generated "reports.txt" input file, and
+// compile a hash of instructions and their attributes (operands, syntax,
+// etc), operand definitions, and register/register class definitions.
+//
+// Since the input file is machine-generated, we assume the file is (very)
+// well-formed, but still do some simple sanity checks to avoid crashes.
+//
+// For each instruction we find, we collect:
+//    - its name.
+//    - its output operand names and types.
+//    - its input operand names and types.
+//    - its assembly formatting rule.
+//    - its llvm matching pattern.
+//    - its parent instruction (if present).
+//    - its subunit identifier.
+//
+// We also look for register definitions, and register class definitions.
+//
+// Currently we write out a file suitable for importing into a spreadsheet.
+// For building outsize of blaze: clang++ scan.cc -o scan
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TDSCAN_SCAN_H_
+#define TDSCAN_SCAN_H_
+
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <cctype>
+#include <filesystem>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "llvm/Support/FormatVariadic.h"
+
+namespace mpact {
+namespace mdl {
+namespace scan {
+
+//-------------------------------------------------------------------------
+// Classes that hold the scraped architecture model.
+//-------------------------------------------------------------------------
+class MachineDescription;
+class ProcessorModel;
+struct SchedModelRef;
+class ProcResource;
+class FuncComboData;
+class SchedMachineModel;
+class SchedReadWrite;
+class SchedAlias;
+
+class SchedVariant;
+class SchedVar;
+class SchedPredicate;
+class MCSchedPredicate;
+class MCInstPredicate;
+class MCStatement;
+class MCOpcodeSwitchCase;
+
+class WriteSequence;
+class InstRW;
+class ItinRW;
+
+class ProcessorItineraries;
+class InstrItineraryData;
+class InstrStage;
+class ItineraryFuncUnit;
+
+class LatencyInfo;
+
+class Instruction;
+class Operand;
+class RegDefinition;
+class RegisterClass;
+
+using StringSet = std::set<std::string>;
+using StringVec = std::vector<std::string>;
+
+// Represent a vector of operand references, including a reference type,
+// a string representing the operand reference, and the original operand id.
+using OperandRefs = std::vector<std::tuple<std::string, std::string, int>>;
+
+//-------------------------------------------------------------------------
+// Formatting and string search functions.
+//-------------------------------------------------------------------------
+template <typename... Ts>
+inline std::string formatv(const char *fmt, Ts &&...vals) {
+  return std::string(llvm::formatv(fmt, vals...));
+}
+
+inline bool str_contains(const std::string &str, const char *search) {
+  return strstr(str.c_str(), search);
+}
+
+//-------------------------------------------------------------------------
+// We don't like attributes with ? in them, just return an empty string.
+//-------------------------------------------------------------------------
+inline std::string FixAttribute(const std::string &attribute) {
+  return (str_contains(attribute, "?")) ? "" : attribute;
+}
+
+//-------------------------------------------------------------------------
+// The definition of a CPU class defined in tablegen files.
+// A cpu record in tablegen files contains the following relevant
+// information: cpu name, and the schedule model. The member `func_units_`
+// are the functional units supported by cpu.
+//-------------------------------------------------------------------------
+class ProcessorModel {
+public:
+  ProcessorModel(const std::string &name, const std::string &sched_model)
+      : name_(name), sched_model_(sched_model) {}
+
+  // Accessor methods.
+  const std::string &name() const { return name_; }
+  const std::string &sched_model() const { return sched_model_; }
+  void set_issue_width(int iw) { issue_width_ = iw; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) { std::cout << ToString(md); }
+
+private:
+  int issue_width_ = -1;
+  std::string name_;
+  std::string sched_model_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a machine model class defined in tablegen files.
+// A single machine-model record in tablegen files contains the following
+// relevant information: issue-width, and the default load latency.
+//-------------------------------------------------------------------------
+class SchedMachineModel {
+public:
+  SchedMachineModel(const std::string name, const std::string &issue_width,
+                    const std::string &load_latency,
+                    const std::string &high_latency, int micro_op_buffer_size,
+                    int mispredict_penalty, const std::string &itineraries,
+                    const std::string family)
+      : name_(name), output_name_(name), issue_width_(std::stoi(issue_width)),
+        load_latency_(std::stoi(load_latency)),
+        high_latency_(std::stoi(high_latency)),
+        micro_op_buffer_size_(micro_op_buffer_size),
+        mispredict_penalty_(mispredict_penalty), itineraries_(itineraries) {
+    fix_up_names(family);
+  }
+
+  SchedMachineModel(const std::string name, const std::string &itineraries,
+                    const std::string family)
+      : name_(name), output_name_(name), itineraries_(itineraries) {
+    fix_up_names(family);
+  }
+
+  void fix_up_names(const std::string &family) {
+    // Fix up the model name to be more user-friendly
+    if (output_name_ == "NoSchedModel")
+      output_name_ = family;
+    if (itineraries_ == "NoItineraries")
+      itineraries_ = "";
+    size_t itin = output_name_.find("Itineraries");
+    if (itin != std::string::npos)
+      output_name_.erase(itin, 11);
+    size_t model = output_name_.find("Model");
+    if (model != std::string::npos)
+      output_name_.erase(model, 5);
+  }
+
+  int issue_width() const { return issue_width_; }
+  int load_latency() const { return load_latency_; }
+  int high_latency() const { return high_latency_; }
+  int micro_op_buffer_size() const { return micro_op_buffer_size_; }
+  int mispredict_penalty() const { return mispredict_penalty_; }
+
+  std::string name() const { return name_; }
+  const std::string &output_name() const { return output_name_; }
+  std::set<std::string> &cpus() { return cpus_; }
+  void add_cpu(std::string cpu) { cpus_.insert(cpu); }
+  StringSet &func_units() { return func_units_; }
+  void add_func_unit(const std::string &unit) { func_units_.insert(unit); }
+  bool has_itineraries() const { return !itineraries().empty(); }
+  const std::string &itineraries() const { return itineraries_; }
+
+  std::vector<ItineraryFuncUnit *> &itinerary_fus() { return itinerary_fus_; }
+  void add_itinerary_func_unit(ItineraryFuncUnit *unit) {
+    if (std::find(itinerary_fus_.begin(), itinerary_fus_.end(), unit) ==
+        itinerary_fus_.end())
+      itinerary_fus_.push_back(unit);
+  }
+
+  // Print Utilities.
+  std::string ToString(const MachineDescription *md) const;
+  void Dump(MachineDescription *md) { std::cout << ToString(md); }
+  std::string Format(MachineDescription *md);
+
+private:
+  std::string name_;             // name scraped from tablegen
+  std::string output_name_;      // name for the output file
+  int issue_width_ = 1;          // default issue width is 1
+  int load_latency_ = 1;         // default load_latency is 1
+  int high_latency_ = 1;         // default high latency is 1
+  int micro_op_buffer_size_ = 0; // default micro-op buffer size
+  int mispredict_penalty_ = 0;   // default misprediction penalty
+  std::string itineraries_;
+
+  // derived information
+  StringSet func_units_;
+  std::vector<ItineraryFuncUnit *> itinerary_fus_;
+  std::set<std::string> cpus_; // which CPUs use this model
+};
+
+//-------------------------------------------------------------------------
+// SuperNames are used to build names of functional units which are
+// superunits in the tablegen file.
+//-------------------------------------------------------------------------
+class SuperName {
+public:
+  explicit SuperName(const std::string &name) : name_(name), num_units_(0) {}
+  bool operator<(const SuperName &rhs) const {
+    if (num_units_ != rhs.num_units_)
+      return num_units_ < rhs.num_units_;
+    return name_ < rhs.name_;
+  }
+  std::string &name() { return name_; }
+  int num_units() const { return num_units_; }
+  void add_name(std::string name) {
+    name_ += ":" + name;
+    num_units_++;
+  }
+
+private:
+  std::string name_; // appended names of subunits
+  int num_units_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a functional-unit class defined in tablegen files.
+// A single functional unit record in tablegen files contains the following
+// relevant information: functional unit name, and number of units. The member
+// `subunits_` are the sub-units supported by the functional unit.
+//-------------------------------------------------------------------------
+class ProcResource {
+public:
+  ProcResource(const std::string &name, const std::string &num_instances,
+               const std::string &buffer_size, std::string sched_model,
+               std::vector<std::string> group_names, std::string fu_super,
+               bool is_proc_res_group)
+      : name_(name), num_instances_(std::stoi(num_instances)),
+        buffer_size_(std::stoi(buffer_size)), sched_model_(sched_model),
+        group_names_(group_names), fu_super_(fu_super),
+        is_proc_res_group_(is_proc_res_group) {}
+
+  // Accessor methods.
+  const std::string &name() const { return name_; }
+  int num_instances() const { return num_instances_; }
+  int buffer_size() const { return buffer_size_; }
+  const std::string &sched_model() { return sched_model_; }
+  bool is_proc_res_group() { return is_proc_res_group_; }
+  const std::string &fu_super() { return fu_super_; }
+  std::vector<std::string> &group_names() { return group_names_; }
+  bool is_resource_group() const { return !group_names_.empty(); }
+  StringSet &resource_group() { return resource_group_; }
+
+  StringVec &base_func_units() { return base_func_units_; }
+  void add_base_unit(const std::string &func_unit) {
+    base_func_units_.push_back(func_unit);
+  }
+  StringVec &child_func_units() { return child_units_; }
+  void add_child_unit(const std::string &func_unit) {
+    child_units_.push_back(func_unit);
+  }
+
+  void add_subunit(int subunit) { subunits_.insert(subunit); }
+  void SortSubunits();
+  std::vector<SuperName> &super_names() { return super_names_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) { std::cout << ToString(md); }
+  std::string Format(MachineDescription *md) const;
+
+private:
+  std::string name_;
+  unsigned num_instances_;  // how many instances
+  int buffer_size_;         // resource buffering parameter
+  std::string sched_model_; // associated sched model
+  StringVec group_names_;
+  std::string fu_super_;
+  bool is_proc_res_group_ = false;
+
+  std::vector<SuperName> super_names_;
+  StringVec child_units_; // units that name this unit as Super.
+
+  // derived information about the processor
+  StringVec base_func_units_;
+  StringSet resource_group_;
+  std::set<int> subunits_; // which subunits are supported.
+};
+
+//-------------------------------------------------------------------------
+// For a ProcReadAdvance tablegen record, the class is used to store the
+// latency and schedule-write resource list.
+//-------------------------------------------------------------------------
+struct ProcReadAdvance {
+  std::set<std::string> valid_writes;
+  int latency;
+  std::string sched_model;
+};
+
+//-------------------------------------------------------------------------
+// For a ProcWriteResources tablegen record, the class is used to store the
+// latency and functional unit list.
+//-------------------------------------------------------------------------
+struct ProcWriteResources {
+  std::set<std::string> proc_resources;
+  int latency;
+  std::string sched_model;
+};
+
+//-------------------------------------------------------------------------
+// For a ProcWriteResources tablegen record, the class is used to store the
+// latency and functional unit list. For a ProcReadAdvance tablegen record, the
+// class is used to store the latency and valid writes information.
+//-------------------------------------------------------------------------
+struct SchedModelRef {
+  int latency;
+  int micro_ops;
+  bool is_begin_group;
+  bool is_end_group;
+  bool is_single_issue;
+  bool retire_ooo;
+  std::vector<std::string> names;
+  std::vector<int> res_cycles;
+  std::vector<int> start_cycles;
+
+  std::vector<std::string> &func_units() { return names; }
+  std::vector<std::string> &valid_writes() { return names; }
+
+  // Format add Functional unit reference attributes for a reference.
+  std::string FormatFusFlags() {
+    std::string out;
+    if (is_begin_group)
+      out += ", BeginGroup";
+    if (is_end_group)
+      out += ", EndGroup";
+    if (is_single_issue)
+      out += ", SingleIssue";
+    if (retire_ooo)
+      out += ", RetireOOO";
+    return out;
+  }
+};
+
+//-------------------------------------------------------------------------
+// Every SchedReadWrite object contains a dictionary of SchedModelRef's
+// indexed by a Model name.
+//-------------------------------------------------------------------------
+using SchedModelDict = std::map<std::string, SchedModelRef>;
+
+//-------------------------------------------------------------------------
+// The definition of a SchedReadWrite record defined in tablegen files.
+// A SchedReadWrite record in tablegen files contains the following relevant
+// information: name,  schedule-model name, latency, and a list of
+// functional-unit names where the Read or Write can be executed. Note that
+// the same SchedReadWrite name can appear for multiple schedule models, in
+// which case we need to keep track of the functional units and corresponding
+// latency for each schedule model.
+//-------------------------------------------------------------------------
+class SchedReadWrite {
+public:
+  SchedReadWrite(const std::string &name, const std::string &sched_model,
+                 const std::vector<std::string> &func_units,
+                 const std::vector<std::string> &res_cycles,
+                 const std::vector<std::string> &start_cycles,
+                 const std::string &latency, const std::string &micro_ops,
+                 bool is_write, bool is_begin_group, bool is_end_group,
+                 bool is_single_issue, bool retire_ooo)
+      : name_(name), is_write_(is_write) {
+    AddSchedModel(sched_model, func_units, res_cycles, start_cycles, latency,
+                  micro_ops, is_write, is_begin_group, is_end_group,
+                  is_single_issue, retire_ooo);
+  }
+
+  void AddSchedModel(const std::string &sched_model,
+                     const std::vector<std::string> &func_units,
+                     const std::vector<std::string> &res_cycles,
+                     const std::vector<std::string> &start_cycles,
+                     const std::string &latency, const std::string &micro_ops,
+                     bool is_write, bool is_begin_group, bool is_end_group,
+                     bool is_single_issue, bool retire_ooo);
+
+  // Accessor methods.
+  const std::string &name() const { return name_; }
+  SchedModelDict &sched_model_info() { return sched_model_info_; }
+  bool is_write() const { return is_write_; }
+  bool is_read() const { return !is_write_; }
+
+  SchedModelRef *HasModel(const std::string &model) {
+    if (sched_model_info_.count(model))
+      return &(sched_model_info_[model]);
+    return nullptr;
+  }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md, const std::string &model,
+                       bool all_info) const;
+  void Dump(MachineDescription *md, bool all_info) {
+    std::cout << ToString(md, "", all_info);
+  }
+
+private:
+  std::string name_;
+  SchedModelDict sched_model_info_;
+  bool is_write_;
+};
+
+//-------------------------------------------------------------------------
+// For each SchedVariant tablegen record, the class is used to store the
+// predicate (which is some C code) and a list of SchedReadWrite.
+//-------------------------------------------------------------------------
+struct PredicatedVariants {
+  PredicatedVariants(const std::string &fn, const std::vector<std::string> &rwu)
+      : code(fn), rw_units(rwu) {}
+  std::string code;
+  std::vector<std::string> rw_units;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a SchedVariant record defined in tablegen files.
+//-------------------------------------------------------------------------
+class SchedVariant {
+public:
+  SchedVariant(const std::string &name, const std::string &sched_model,
+               const std::vector<std::string> &variants, bool is_write,
+               bool is_variadic)
+      : name_(name), sched_model_(sched_model), variants_(variants),
+        is_write_(is_write), is_variadic_(is_variadic) {}
+
+  // Accessor Functions.
+  const std::vector<std::string> &variants() const { return variants_; }
+  const std::string &sched_model() const { return sched_model_; }
+  const std::string &name() const { return name_; }
+  bool is_write() const { return is_write_; }
+  bool is_read() const { return !is_write_; }
+  bool is_variadic() const { return is_variadic_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md, std::string prefix) const;
+  void Dump(MachineDescription *md) { std::cout << ToString(md, ""); }
+
+private:
+  std::string name_;
+  std::string sched_model_;
+  std::vector<std::string> variants_;
+  bool is_write_ = false;
+  bool is_variadic_ = false;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a SchedVar record defined in tablegen files.
+//-------------------------------------------------------------------------
+class SchedVar {
+public:
+  SchedVar(const std::string &name, const std::string &predicate,
+           const std::vector<std::string> &selected)
+      : name_(name), predicate_(predicate), selected_(selected) {}
+
+  // Accessor Functions.
+  const std::vector<std::string> &selected() const { return selected_; }
+  const std::string &predicate() const { return predicate_; }
+  const std::string &name() const { return name_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md, std::string prefix) const;
+  void Dump(MachineDescription *md) { std::cout << ToString(md, ""); }
+
+private:
+  std::string name_;
+  std::string predicate_;
+  std::vector<std::string> selected_;
+};
+
+//----------------------------------------------------------------------------
+// Names that correspond to LLVM predicate operators.
+//----------------------------------------------------------------------------
+constexpr auto kCheckAny = "CheckAny";
+constexpr auto kCheckAll = "CheckAll";
+constexpr auto kCheckNot = "CheckNot";
+constexpr auto kCheckOpcode = "CheckOpcode";
+constexpr auto kCheckIsRegOperand = "CheckIsRegOperand";
+constexpr auto kCheckRegOperand = "CheckRegOperand";
+constexpr auto kCheckInvalidRegOperand = "CheckInvalidRegOperand";
+constexpr auto kCheckSameRegOperand = "CheckSameRegOperand";
+constexpr auto kCheckIsImmOperand = "CheckIsImmOperand";
+constexpr auto kCheckImmOperand = "CheckImmOperand";
+constexpr auto kCheckZeroOperand = "CheckZeroOperand";
+constexpr auto kCheckFunctionPredicate = "CheckFunctionPredicate";
+constexpr auto kCheckFunctionPredicateWithTII = "CheckFunctionPredicateWithTII";
+constexpr auto kCheckNumOperands = "CheckNumOperands";
+constexpr auto kOpcodeSwitchStmt = "OpcodeSwitchStatement";
+constexpr auto kOpcodeSwitchCase = "OpcodeSwitchCase";
+constexpr auto kReturnStatement = "ReturnStatement";
+constexpr auto kName = "Name";
+constexpr auto kNumber = "Number";
+constexpr auto kString = "String";
+constexpr auto kCode = "Code";
+constexpr auto kTrue = "TruePred";
+constexpr auto kFalse = "FalsePred";
+constexpr auto kEmpty = "Empty";
+
+//-------------------------------------------------------------------------
+// The definition of a SchedPredicate record defined in tablegen files.
+//-------------------------------------------------------------------------
+class SchedPredicate {
+public:
+  SchedPredicate(const std::string &name, const std::string &sched_model,
+                 const std::string &predicate)
+      : name_(name), sched_model_(sched_model), predicate_(predicate) {}
+
+  // Accessor Functions.
+  const std::string &predicate() const { return predicate_; }
+  const std::string &sched_model() const { return sched_model_; }
+  const std::string &name() const { return name_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  std::string sched_model_;
+  std::string predicate_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a MCSchedPredicate record defined in tablegen files.
+//-------------------------------------------------------------------------
+class MCSchedPredicate {
+public:
+  MCSchedPredicate(const std::string &name, const std::string &sched_model,
+                   const std::string &inst_predicate)
+      : name_(name), sched_model_(sched_model),
+        inst_predicate_(inst_predicate) {}
+
+  // Accessor Functions.
+  const std::string &inst_predicate() const { return inst_predicate_; }
+  const std::string &sched_model() const { return sched_model_; }
+  const std::string &name() const { return name_; }
+  bool is_referenced() const { return is_referenced_; }
+  void set_is_referenced() { is_referenced_ = true; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  std::string sched_model_;
+  std::string inst_predicate_;
+  bool is_referenced_ = false; // was this referenced by name
+};
+
+//-------------------------------------------------------------------------
+// The definition of a MCInstPredicate record defined in tablegen files.
+//-------------------------------------------------------------------------
+class MCInstPredicate {
+public:
+  MCInstPredicate(const std::string &name, const std::string &function_name,
+                  const std::string function_mapper, const std::string opindex,
+                  const std::string immval,
+                  const std::vector<std::string> &predicates,
+                  const std::vector<std::string> &valid_opcodes,
+                  const std::string &register_name,
+                  const std::vector<std::string> &attributes,
+                  const std::string &statement)
+      : name_(name), function_name_(function_name),
+        function_mapper_(function_mapper), opindex_(opindex), immval_(immval),
+        predicates_(predicates), valid_opcodes_(valid_opcodes),
+        register_name_(register_name), attributes_(attributes),
+        statement_(statement) {}
+
+  // Accessor Functions.
+  const std::string &function_name() const { return function_name_; }
+  const std::string &name() const { return name_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md, int indent) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md, 0); }
+  std::string Format(MachineDescription *md, int indent) const;
+  bool is_referenced() const { return is_referenced_; }
+  void set_is_referenced() { is_referenced_ = true; }
+
+private:
+  std::string name_;
+  std::string function_name_;
+  std::string function_mapper_;
+  std::string opindex_;
+  std::string immval_;
+  std::vector<std::string> predicates_;
+  std::vector<std::string> valid_opcodes_;
+  std::string register_name_;
+  std::vector<std::string> attributes_;
+  std::string statement_;
+  bool is_referenced_ = false; // was this referenced by name
+};
+
+//-------------------------------------------------------------------------
+// The definition of a MCStatement record defined in tablegen files.
+//-------------------------------------------------------------------------
+class MCStatement {
+public:
+  MCStatement(const std::string &name, const std::string attribute,
+              const std::string &predicate,
+              const std::vector<std::string> &cases,
+              const std::string &default_case)
+      : name_(name), attribute_(attribute), cases_(cases),
+        predicate_(predicate), default_(default_case) {}
+
+  bool is_referenced() const { return is_referenced_; }
+  void set_is_referenced() { is_referenced_ = true; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md, int indent) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md, 0); }
+  std::string Format(MachineDescription *md, int indent) const;
+
+private:
+  std::string name_;
+  std::string attribute_;
+  std::vector<std::string> cases_;
+  std::string predicate_;
+  std::string default_;
+  bool is_referenced_ = false; // was this referenced by name
+};
+
+//-------------------------------------------------------------------------
+// The definition of a MCOpcodeSwitchCase record defined in tablegen files.
+//-------------------------------------------------------------------------
+class MCOpcodeSwitchCase {
+public:
+  MCOpcodeSwitchCase(const std::string &name,
+                     const std::vector<std::string> &cases,
+                     const std::string &case_stmt)
+      : name_(name), cases_(cases), case_stmt_(case_stmt) {}
+
+  bool is_referenced() const { return is_referenced_; }
+  void set_is_referenced() { is_referenced_ = true; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md, int indent) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md, 0); }
+  std::string Format(MachineDescription *md, int indent) const;
+
+private:
+  std::string name_;
+  std::vector<std::string> cases_;
+  std::string case_stmt_;
+  bool is_referenced_ = false; // was this referenced by name
+};
+
+//-------------------------------------------------------------------------
+// The definition of a WriteSequence record defined in tablegen files.
+//-------------------------------------------------------------------------
+class WriteSequence {
+public:
+  WriteSequence(const std::string &name, const std::string &sched_model,
+                const std::vector<std::string> &writes, int repeat)
+      : name_(name), sched_model_(sched_model), writes_(writes),
+        repeat_(repeat) {}
+
+  // Accessor Functions.
+  const std::vector<std::string> &writes() const { return writes_; }
+  int repeat() const { return repeat_; }
+  const std::string &name() const { return name_; }
+  std::string &sched_model() { return sched_model_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  std::string sched_model_;
+  std::vector<std::string> writes_;
+  int repeat_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a SchedAlias record defined in tablegen files.
+//-------------------------------------------------------------------------
+class SchedAlias {
+public:
+  SchedAlias(const std::string &name, const std::string &sched_model,
+             const std::string &match_rw, const std::string &alias_rw)
+      : name_(name), sched_model_(sched_model), match_rw_(match_rw),
+        alias_rw_(alias_rw) {}
+
+  // Accessor Functions.
+  const std::string &sched_model() const { return sched_model_; }
+  const std::string &match_rw() const { return match_rw_; }
+  const std::string &alias_rw() const { return alias_rw_; }
+  const std::string &name() const { return name_; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  std::string sched_model_;
+  std::string match_rw_;
+  std::string alias_rw_;
+};
+
+//-------------------------------------------------------------------------
+// Forwarding information for an InstRW record. Each entry in a
+// ForwardUnits set is a tuple of FUs and a latency, and an optional predicate.
+// An entry in a ForwardSet is an operand id, a latency, a definition
+// functional unit, a use functional unit, and an optional predicate.
+// For each potentially forwarded operand, we have a graph of FU pairs
+// that support forwarding.
+//-------------------------------------------------------------------------
+using ForwardUnits = std::set<std::tuple<std::string, int, std::string>>;
+using ForwardItem = std::tuple<int, int, std::string, std::string, std::string>;
+using ForwardSet = std::set<ForwardItem>;
+
+// Describe the overall forwarding graph for a processor. Its implementation
+// is an array indexed by SchedModel, DefFU, and UseFU, and the integer
+// represents the latency between the two functional units (for the specified
+// model).
+using ForwardGraph =
+    std::map<std::tuple<std::string, std::string, std::string>, std::set<int>>;
+
+class ForwardingNetwork {
+public:
+  ForwardingNetwork() {}
+  // Add an edge to the graph indicating the forwarding latency between the
+  // two units. We need to find the minimum latency between the two units,
+  // while ignoring negative latencies.
+  void AddEdge(const std::string &model, const std::string &def_unit,
+               const std::string &use_unit, int latency) {
+    auto tup = std::make_tuple(model, def_unit, use_unit);
+    graph_[tup].insert(latency);
+  }
+  std::set<int> GetEdge(const std::string &model, const std::string &def_unit,
+                        const std::string &use_unit) {
+    auto tup = std::make_tuple(model, def_unit, use_unit);
+    if (!graph_.count(tup))
+      return {};
+    return graph_[tup];
+  }
+  ForwardGraph &graph() { return graph_; }
+
+private:
+  ForwardGraph graph_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a InstRW record defined in tablegen files.
+//-------------------------------------------------------------------------
+class InstRW {
+public:
+  InstRW(const std::string &name, const std::vector<std::string> &rw_units,
+         const std::string &sched_model,
+         const std::vector<std::string> &instregex,
+         const std::vector<std::string> &instrs)
+      : name_(name), rw_units_(rw_units), sched_model_(sched_model),
+        instregex_(instregex), instrs_(instrs) {}
+
+  // Accessor Functions.
+  const std::vector<std::string> &rw_units() const { return rw_units_; }
+  const std::string &name() const { return name_; }
+  const std::string &sched_model() const { return sched_model_; }
+  const std::vector<std::string> &instregex() const { return instregex_; }
+  std::vector<std::string> &instrs() { return instrs_; }
+  ForwardSet &forwarding_info() { return forwarding_info_; }
+
+  // Note which functional units this InstRW applies to.
+  std::set<std::string> &func_units() { return func_units_; }
+  void add_func_unit(std::string name) { func_units_.insert(name); }
+
+  // Print Utilities.
+  std::string ToString(const MachineDescription *md, bool full) const;
+  std::string FwdString(const MachineDescription *md) const;
+  void Dump(MachineDescription *md, bool full) const {
+    std::cout << ToString(md, full);
+  }
+  void DumpFwd(MachineDescription *md) const { std::cout << FwdString(md); }
+
+private:
+  std::string name_;
+  std::vector<std::string> rw_units_;
+  std::string sched_model_;
+  std::set<std::string> func_units_; // Which FUs does it run on
+  std::vector<std::string> instregex_;
+  std::vector<std::string> instrs_;
+  ForwardSet forwarding_info_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a ItinRW records defined in tablegen files.  This is
+// apparently only used for ARM.
+//-------------------------------------------------------------------------
+class ItinRW {
+public:
+  ItinRW(std::string name, const std::vector<std::string> &itin_classes,
+         const std::vector<std::string> &rw_units, std::string sched_model)
+      : name_(name), itin_classes_(itin_classes), rw_units_(rw_units),
+        sched_model_(sched_model) {}
+
+  const std::string &name() const { return name_; }
+  const std::vector<std::string> &itin_classes() const { return itin_classes_; }
+  const std::vector<std::string> &rw_units() const { return rw_units_; }
+  const std::string &sched_model() const { return sched_model_; }
+
+  std::string ToString(const MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  std::vector<std::string> itin_classes_;
+  std::vector<std::string> rw_units_;
+  std::string sched_model_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a ProcessorItineraries record defined in tablegen files.
+//-------------------------------------------------------------------------
+class ProcessorItineraries {
+public:
+  ProcessorItineraries(const std::string name, const StringSet &resources,
+                       const std::vector<std::string> &bypass,
+                       const StringSet &iid)
+      : name_(name), resources_(resources), bypass_(bypass), iid_(iid) {}
+
+  // Accessor Functions.
+  const StringSet &resources() const { return resources_; }
+  const std::vector<std::string> &bypass() const { return bypass_; }
+  const StringSet &iid() const { return iid_; }
+
+  // Print Utilities.
+  std::string ToString(const MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  StringSet resources_;
+  std::vector<std::string> bypass_;
+  StringSet iid_;
+};
+
+//------------------------------------------------------------------------
+// Object representing a single resource expression node.
+// Note: we depend on the order of these operators.
+//------------------------------------------------------------------------
+enum class ResOp { kNop, kRes, kAnd, kOr };
+
+class ResExpr {
+public:
+  ResExpr() : op_(ResOp::kNop) {}
+  explicit ResExpr(ResOp op) : op_(op) {}
+  explicit ResExpr(std::string resource)
+      : op_(ResOp::kRes), resource_(resource) {}
+  ResExpr(ResOp opcode, std::vector<ResExpr> &operands)
+      : op_(opcode), opnds_(operands) {
+    std::sort(opnds_.begin(), opnds_.end());
+  }
+  ResExpr(ResOp opcode, std::vector<std::string> &units) : op_(opcode) {
+    for (auto &unit : units)
+      opnds_.emplace_back(unit);
+    std::sort(opnds_.begin(), opnds_.end());
+  }
+  ResExpr(ResOp opcode, ResExpr &lhs, ResExpr &rhs) : op_(opcode) {
+    if (lhs.op() == opcode)
+      opnds_ = lhs.opnds_;
+    else
+      opnds_.push_back(lhs);
+    if (rhs.op() == opcode)
+      opnds_.insert(opnds_.end(), rhs.opnds_.begin(), rhs.opnds_.end());
+    else
+      opnds_.push_back(rhs);
+    std::sort(opnds_.begin(), opnds_.end());
+  }
+
+  ResOp op() const { return op_; }
+  std::vector<ResExpr> &opnds() { return opnds_; }
+  std::string resource() const { return resource_; }
+  bool isRes() const { return op_ == ResOp::kRes; }
+  bool isOr() const { return op_ == ResOp::kOr; }
+  bool isAnd() const { return op_ == ResOp::kAnd; }
+  bool isNop() const { return op_ == ResOp::kNop; }
+
+  std::string ToString(const std::vector<ResExpr> &expr,
+                       std::string sep) const {
+    std::string out;
+    for (auto &item : expr)
+      out += (out.empty() ? "" : sep) + item.ToString();
+    return out;
+  }
+
+  bool operator==(const ResExpr &rhs) const {
+    if (op_ != rhs.op())
+      return false;
+    if (op_ == ResOp::kRes)
+      return resource_ == rhs.resource();
+    if (opnds_.size() != rhs.opnds_.size())
+      return false;
+    for (unsigned idx = 0; idx < opnds_.size(); idx++)
+      if (opnds_[idx] != rhs.opnds_[idx])
+        return false;
+    return true;
+  }
+
+  bool operator!=(const ResExpr &rhs) const { return !(*this == rhs); }
+
+  bool operator<(const ResExpr &rhs) const {
+    if (op_ < rhs.op())
+      return true;
+    if (op_ == ResOp::kRes)
+      return resource_ < rhs.resource();
+    for (unsigned idx = 0; idx < rhs.opnds_.size(); idx++) {
+      if (idx == opnds_.size())
+        return true;
+      if (opnds_[idx] < rhs.opnds_[idx])
+        return true;
+      if (opnds_[idx] > rhs.opnds_[idx])
+        return false;
+    }
+    if (opnds_.size() > rhs.opnds_.size())
+      return false;
+    return true;
+  }
+  bool operator>(const ResExpr &rhs) const { return rhs < *this; }
+
+  std::string ToString() const {
+    if (isRes())
+      return resource_;
+    if (isOr())
+      return ToString(opnds_, " | ");
+    if (isAnd())
+      return ToString(opnds_, " & ");
+    return "nop";
+  }
+
+private:
+  ResOp op_;                   // what kind of op is this
+  std::string resource_;       // name of resource
+  std::vector<ResExpr> opnds_; // operands of this operation
+};
+
+//------------------------------------------------------------------------
+// Represent a single functional unit template argument.
+//------------------------------------------------------------------------
+class ResArg {
+public:
+  ResArg(ResExpr expr, int phase, int cycles)
+      : exprs_(1, expr), phase_(phase), cycles_(cycles) {}
+  int cycles() const { return cycles_; }
+  int phase() const { return phase_; }
+  std::vector<ResExpr> &exprs() { return exprs_; }
+  void set_expr(ResExpr &expr) { exprs_[0] = expr; }
+  std::string ToString() const {
+    std::string out = formatv(" phase={0} cycles={1} :", phase_, cycles_);
+    for (auto &expr : exprs_)
+      out += expr.ToString() + " || ";
+    out = out.substr(0, out.size() - 4);
+    return out;
+  }
+
+private:
+  std::vector<ResExpr> exprs_; // set of alternative resource specifications
+  int phase_, cycles_;         // what phase is it used in, for now long
+};
+
+//-------------------------------------------------------------------------
+// The definition of an InstrItinData record defined in tablegen files.
+//-------------------------------------------------------------------------
+class InstrItineraryData {
+public:
+  InstrItineraryData(const std::string name,
+                     const std::string &instr_itinerary_class,
+                     int num_micro_ops, const std::vector<std::string> &stages,
+                     const std::vector<std::string> &operand_cycles,
+                     const std::vector<std::string> &bypasses)
+      : name_(name), instr_itinerary_class_(instr_itinerary_class),
+        num_micro_ops_(num_micro_ops), stages_(stages),
+        operand_cycles_(operand_cycles), bypasses_(bypasses) {}
+
+  // Accessor Functions.
+  const std::string &name() const { return name_; }
+  const std::string &instr_itinerary_class() const {
+    return instr_itinerary_class_;
+  }
+  std::vector<std::string> &stages() { return stages_; }
+  const std::vector<std::string> &bypasses() const { return bypasses_; }
+  const std::vector<std::string> &operand_cycles() const {
+    return operand_cycles_;
+  }
+  int num_micro_ops() const { return num_micro_ops_; }
+  void add_model(std::string model) { models_.insert(model); }
+  const std::set<std::string> &models() { return models_; }
+
+  std::vector<ResExpr> &issue_stages() { return issue_stages_; }
+  void set_issue_stage(ResExpr expr) { issue_stages_.push_back(expr); }
+  std::vector<ResArg> &resource_args() { return resource_args_; }
+  void set_resource_args(std::vector<ResArg> &args) { resource_args_ = args; }
+
+  // Print Utilities.
+  std::string ToString(MachineDescription *md) const;
+  void Dump(MachineDescription *md) { std::cout << ToString(md); }
+
+private:
+  std::string name_;
+  std::string instr_itinerary_class_;
+  int num_micro_ops_;
+  std::vector<std::string> stages_;
+  std::vector<std::string> operand_cycles_;
+  std::vector<std::string> bypasses_;
+  std::set<std::string> models_; // Models that use this itindata
+
+  std::vector<ResExpr> issue_stages_; // alternative issue slot expressions
+  std::vector<ResArg> resource_args_; // set of template arguments
+};
+
+//-------------------------------------------------------------------------
+// The definition of an InstrStage record defined in tablegen files.
+//-------------------------------------------------------------------------
+class InstrStage {
+public:
+  InstrStage(const std::string name, int cycles, const StringVec &func_units,
+             int timeinc)
+      : name_(name), cycles_(cycles), func_units_(func_units),
+        timeinc_(timeinc) {}
+
+  // Accessor Functions.
+  const std::string &name() const { return name_; }
+  StringVec &func_units() { return func_units_; }
+  int cycles() const { return cycles_; }
+  int timeinc() const { return timeinc_; }
+  int cycle_increment() const { return (timeinc_ == -1) ? cycles_ : timeinc_; }
+  ResExpr &resource_expr() { return resource_expr_; }
+  void set_resource_expr(ResExpr expr) { resource_expr_ = expr; }
+
+  void clear_issue_flag() { is_issue_stage_ = false; }
+  void set_issue_flag() { is_issue_stage_ = true; }
+  bool is_issue_stage() const { return is_issue_stage_; }
+
+  // Print Utilities.
+  std::string ToString(const MachineDescription *md) const;
+  void Dump(MachineDescription *md) const { std::cout << ToString(md); }
+
+private:
+  std::string name_;            // Tablegen name of stage
+  int cycles_;                  // number of cycles resource is used
+  StringVec func_units_;        // Set of alternative resources used
+  int timeinc_;                 // time increment to next stage (-1, 0, or +)
+  ResExpr resource_expr_;       // expression represented by this stage
+  bool is_issue_stage_ = false; // true if only accessing slot resources
+};
+
+//-------------------------------------------------------------------------
+// The definition of a functional unit defined for some set of itineraries.
+//-------------------------------------------------------------------------
+class ItineraryFuncUnit {
+public:
+  ItineraryFuncUnit(std::string &name, InstrItineraryData *itinerary)
+      : name_(name), itinerary_(itinerary) {}
+
+  std::string name() const { return name_; }
+  std::vector<std::string> &stages() { return itinerary_->stages(); }
+  std::set<std::pair<int, std::string>> &subunits() { return subunits_; }
+  InstrItineraryData *itinerary() const { return itinerary_; }
+
+  void add_subunit(int subunit, InstrItineraryData *itin) {
+    subunits_.insert({subunit, itin->name()});
+  }
+  int FormatInstance(std::string *out, int fu_id, MachineDescription *md);
+  std::string FormatTemplate(MachineDescription *md);
+
+private:
+  std::string name_;
+  InstrItineraryData *itinerary_; // pointer to itinerary data
+  std::set<std::pair<int, std::string>> subunits_;
+};
+
+//-------------------------------------------------------------------------
+// Instruction describes an instruction definition in the input file.
+// An instruction def record is a collection of field definitions in an
+// arbitrary order. We scan the entire record looking for particular entries.
+//
+//    def <name> { // <attribute> <attribute> ...
+//      field <attribute-name> = ...
+//      int <attribute-name> = ...
+//      string <attribute-name> = "..."
+//      dag <attribute-name> = (ins ...)
+//      dag <attribute-name> = (outs ...)
+//      list<string> SubUnits = ["subunit"];
+//      list ...
+//      bit <name> = ...
+//      ...
+//    }
+//-------------------------------------------------------------------------
+class Instruction {
+public:
+  Instruction(const std::string &name, std::vector<std::string> outs,
+              std::vector<std::string> ins, std::string assembly,
+              std::string pattern, std::string parent,
+              std::vector<std::string> *subunit,
+              std::vector<std::string> rw_units, std::vector<std::string> uses,
+              std::vector<std::string> defs, std::string base_instr,
+              std::string itinerary, bool may_load, bool pseudo, bool generic)
+      : name_(name), outs_(outs), ins_(ins), assembly_(assembly),
+        pattern_(pattern), subunit_(subunit), rw_units_(rw_units),
+        parent_(parent), base_instr_(base_instr), implicit_uses_(uses),
+        implicit_defs_(defs), itinerary_(itinerary), may_load_(may_load),
+        pseudo_(pseudo), generic_(generic) {
+    if (itinerary_ == "NoItinerary")
+      itinerary_ = "";
+  }
+
+  std::string ToCsvString() const;
+  void WriteCsv() { std::cout << ToCsvString(); }
+  std::string Format(bool full_definition) const;
+
+  std::string ToString(MachineDescription *md);
+  void Dump(MachineDescription *md) { std::cout << ToString(md); }
+
+  bool HasSubunits() const { return subunit_ != nullptr && !subunit_->empty(); }
+  bool HasChildren() const { return !children_.empty(); }
+
+  static std::string opnd_type(std::string opnd) {
+    return opnd.substr(0, opnd.find_first_of(' '));
+  }
+  static std::string opnd_name(std::string opnd) {
+    return opnd.substr(opnd.find_last_of(' ') + 1);
+  }
+  std::vector<std::string> &ins() { return ins_; }
+  std::vector<std::string> &outs() { return outs_; }
+  std::vector<std::string> &flattened_ins() { return flattened_ins_; }
+  std::vector<std::string> &flattened_outs() { return flattened_outs_; }
+  std::vector<std::string> &implicit_uses() { return implicit_uses_; }
+  std::vector<std::string> &implicit_defs() { return implicit_defs_; }
+  bool has_variable_ops() const {
+    return !ins_.empty() && ins_.back() == "...";
+  }
+
+  std::string opnd_name(unsigned index) const {
+    if (index < flattened_outs_.size())
+      return opnd_name(flattened_outs_[index]);
+    index -= flattened_outs_.size();
+    if (index < flattened_ins_.size())
+      return opnd_name(flattened_ins_[index]);
+    return "";
+  }
+
+  // Return the symbolic operand name for the nth output operand.
+  std::string out(unsigned index) const {
+    if (index < flattened_outs_.size())
+      return opnd_name(flattened_outs_[index]);
+    if (index - flattened_outs_.size() < implicit_defs_.size())
+      return implicit_defs_[index - flattened_outs_.size()];
+    return "";
+  }
+  // Return the symbolic operand name for the nth input operand.
+  std::string in(unsigned index) const {
+    if (index < flattened_ins_.size())
+      return opnd_name(flattened_ins_[index]);
+    if (index - flattened_ins_.size() < implicit_uses_.size())
+      return implicit_uses_[index - flattened_ins_.size()];
+    return "";
+  }
+  // Return the flattened operand index for the nth output operand.
+  int out_index(unsigned index) const {
+    if (index < flattened_outs_.size())
+      return index;
+    if (index - flattened_outs_.size() < implicit_defs_.size())
+      return flattened_ins_.size() - has_variable_ops() + index;
+    return -1;
+  }
+  // Return the flattened operand index for the nth input operand.
+  int in_index(unsigned index) const {
+    unsigned ins_size = flattened_ins_.size() - has_variable_ops();
+    if (index < ins_size)
+      return index + flattened_outs_.size();
+    if (index - ins_size < implicit_uses_.size())
+      return flattened_outs_.size() + implicit_defs_.size() + index;
+    return -1;
+  }
+
+  std::string name() const { return name_; }
+  std::vector<Instruction *> children() { return children_; }
+  // When adding InstRWs to instructions, only the first item associated with
+  // a particular SchedModel is valid.
+  void add_inst_rw(InstRW *item) {
+    if (!inst_rws_.count(item->sched_model())) {
+      inst_rws_[item->sched_model()] = item;
+    }
+  }
+
+  void add_subunit(int subunit_id) { subunits_.insert(subunit_id); }
+  const std::set<int> &subunits() const { return subunits_; }
+
+  const std::vector<std::string> &rw_units() { return rw_units_; }
+  std::map<std::string, InstRW *> &inst_rws() { return inst_rws_; }
+  std::string itinerary() const { return itinerary_; }
+  bool may_load() const { return may_load_; }
+
+public:
+  std::string name_;                         // name of instruction
+  std::vector<std::string> outs_;            // output operands
+  std::vector<std::string> ins_;             // input operands
+  std::vector<std::string> flattened_outs_;  // flattened output operands
+  std::vector<std::string> flattened_ins_;   // flattened input operands
+  std::string assembly_;                     // assembly syntax
+  std::string pattern_;                      // dag pattern that this matches
+  std::vector<std::string> *subunit_;        // subunit identifier(s)
+  std::vector<std::string> rw_units_;        // rw_units specified in instr
+  std::map<std::string, InstRW *> inst_rws_; // InstRWs for this model/instr
+
+  std::string parent_; // this is a copy of another instruction
+  std::vector<Instruction *> children_;
+  std::string base_instr_;
+  std::vector<std::string> implicit_uses_, implicit_defs_;
+  std::string itinerary_;
+  bool may_load_; // true if instruction may do a load
+  bool pseudo_;   // true if this instruction is marked as a pseudo instruction
+  bool generic_;  // true if this is a "generic" instruction
+
+  // derived information
+  std::set<int> subunits_; // subunits for this instruction.
+};
+
+//-------------------------------------------------------------------------
+// An Operand captures an Operand definition record.
+//    def <name> { // DAGOperand ...
+//      ValueType Type = ...;
+//      dag MIOperandInfo = (ops ...";
+//      ...
+//    }
+//-------------------------------------------------------------------------
+class Operand {
+public:
+  Operand(const std::string &name, std::vector<std::string> ops,
+          const std::string &type)
+      : name_(name), ops_(ops), type_(type) {}
+
+  std::string Format() const;
+
+  bool referenced() const { return referenced_; }
+  void set_referenced() { referenced_ = true; }
+  std::vector<std::string> &ops() { return ops_; }
+
+private:
+  std::string name_;             // operand name
+  std::vector<std::string> ops_; // set of components
+  std::string type_;             // "type" of operand (ala I32)
+  bool referenced_ = false;      // true if any instructions use it
+};
+
+//-------------------------------------------------------------------------
+// Definitions for registers and register classes.
+//-------------------------------------------------------------------------
+using RegDefinitions = std::set<RegDefinition>;
+using RegSet = std::vector<RegDefinition>;
+
+std::string GetRegisterPrefix(const std::string &reg, int &regid);
+
+// Definition of a single register.
+// We extract the prefix and index so that we can do reasonable sorting.
+class RegDefinition {
+public:
+  explicit RegDefinition(std::string name) : name_(name) {
+    prefix_ = GetRegisterPrefix(name, index_);
+    last_index_ = index_;
+  }
+  RegDefinition() {}
+
+  std::string name() const { return name_; }
+  std::string prefix() const { return prefix_; }
+  int index() const { return index_; }
+  int last_index() const { return last_index_; }
+  void set_last_index(int index) { last_index_ = index; }
+
+  std::string Format() const;
+  bool operator<(const RegDefinition &rhs) const {
+    return prefix() < rhs.prefix() ||
+           (prefix() == rhs.prefix() && index() < rhs.index());
+  }
+  bool operator>(const RegDefinition &rhs) const { return rhs < *this; }
+  bool operator==(const RegDefinition &rhs) const {
+    return rhs.name_ == this->name_;
+  }
+
+private:
+  std::string name_;
+  std::string prefix_;
+  int index_;
+  int last_index_;
+};
+
+//-------------------------------------------------------------------------
+// The definition of a register class defined in tablegen files.
+//-------------------------------------------------------------------------
+class RegisterClass {
+public:
+  RegisterClass(const std::string &name, const char *members)
+      : name_(name), member_list_(nullptr) {
+    if (members) {
+      member_list_ = new char[strlen(members) + 1];
+      memcpy(member_list_, members, strlen(members) + 1);
+    }
+  }
+
+  std::string Format();
+  void dump() { std::cout << Format(); }
+
+  bool referenced() const { return referenced_; }
+  void set_referenced() { referenced_ = true; }
+  const RegSet &registers() const { return registers_; }
+  void set_registers(RegSet regs) { registers_ = regs; }
+  char *member_list() const { return member_list_; }
+  bool parsed() const { return parsed_; }
+  void set_parsed() { parsed_ = true; }
+
+private:
+  std::string name_;        // name of register class
+  RegSet registers_;        // Order can matter for these (because of trunc)
+  char *member_list_;       // Expression that defines class members.
+  bool referenced_ = false; // true if any instructions/operands use it
+  bool parsed_ = false;     // true if we've parsed the member expression
+};
+
+//--------------------------------------------------------------------------
+// Container for all the information we scrape from tablegen files.
+//--------------------------------------------------------------------------
+class MachineDescription {
+public:
+  MachineDescription(std::string family, std::string output_name,
+                     bool ignore_subunits, bool no_warnings,
+                     bool gen_subunit_bases, bool gen_forwarding_info,
+                     bool gen_operand_indexes)
+      : family_(family), output_name_(output_name),
+        ignore_subunits_(ignore_subunits), no_warnings_(no_warnings),
+        gen_subunit_bases_(gen_subunit_bases),
+        gen_forwarding_info_(gen_forwarding_info),
+        gen_operand_indexes_(gen_operand_indexes) {}
+
+  std::string family() const { return family_; }
+  bool ignore_subunits() const { return ignore_subunits_; }
+  bool no_warnings() const { return no_warnings_; }
+  bool gen_subunit_bases() const { return gen_subunit_bases_; }
+  bool gen_forwarding_info() const { return gen_forwarding_info_; }
+  bool gen_operand_indexes() const { return gen_operand_indexes_; }
+
+  void OpenOutputFiles(std::string &input_file, bool gen_arch_spec,
+                       std::string &output_dir);
+
+  // Functions that write MDL records to the output file.
+  void WriteMDL(std::string &input_file, bool gen_arch_spec,
+                std::string &output_dir);
+  void WriteFileHeaders();
+  void WriteArchHeader(const char *section, int entries);
+  void WriteInstHeader(const char *section);
+  void WritePipelinePhase();
+  void WriteProcessorModel();
+  void WriteFUInfo();
+  void WriteSubunits();
+  void WriteLatencies();
+  void WritePredicates();
+  void WriteRegisterSets();
+  void WriteRegisterClasses();
+  void WriteOperandsMdl();
+  void WriteInstructionsMdl();
+
+  void WriteInstructionsCsv();
+
+  // Functions for dumping TG records for debug
+  void DumpTableGenInfo();
+  void DumpForwardingGraph();
+  void DumpInstrForwardingInfo();
+  void DumpStats();
+  void DumpSchedReadWrite(bool summary);
+  std::string DumpSchedRW(const std::string &name, const std::string &model,
+                          std::string prefix);
+  void DumpInstRW();
+  void DumpItinRW();
+  void DumpForwardingInfo();
+  void DumpSchedVariant();
+  void DumpSchedVar();
+  void DumpPredicates();
+  void DumpSchedAlias();
+
+  void DumpWriteSequence();
+  void DumpProcessorItineraries();
+  void DumpInstrStage();
+  void DumpInstrItineraryData();
+  void DumpBypasses();
+  void DumpInstrItineraryClasses();
+  void DumpProcessorModel();
+  void DumpSchedMachineModel();
+  void DumpFUInfo();
+  void DumpLatInfo();
+  void DumpInstructions();
+
+  bool FormatRef(std::string &out, const Instruction *inst,
+                 const std::string &sched_model, const std::string &unit_name,
+                 const std::string &operand, int repeat, int &latency,
+                 bool &fu_seen, std::string indent = "   ");
+  std::string FormatReferences(Instruction *instr,
+                               const std::string &sched_model,
+                               const InstRW *inst_rw);
+
+  // Functions for scanning various parts of the file.
+  void SkipRecord(std::ifstream &in);
+  char *GetLine(std::ifstream &in, std::string &input);
+
+  std::string GetOperandName(const std::string &opnd);
+  std::string GetOperandType(const std::string &opnd);
+
+  OperandRefs GetRegisterOperands(Instruction *instr);
+  void GetRegisterOperands(std::string prefix, std::string ref_type,
+                           int opnd_id, StringVec &opnds, OperandRefs &result);
+
+  void FindReferencedOperands();
+  void FindRegisterSets();
+  void PopulateResourceGroup(ProcResource *func);
+  void PopulateCPUsWithFUs();
+  ProcResource *CreateSuperUnitNames(const std::string &unit_name);
+
+  bool IsReadUnit(const std::string &name, const std::string &model);
+  void FindReadUnits(const std::string &name, const std::string &model,
+                     ForwardUnits &units, const std::string &predicate);
+  void FindWriteUnits(const std::string &name, const std::string &model,
+                      ForwardUnits &units, int latency,
+                      const std::string &predicate);
+
+  void ExtractForwardingInfo(InstRW *instrw);
+  std::vector<std::string>
+  FlattenInstrOperands(Instruction *instr,
+                       const std::vector<std::string> &operands);
+  void ProcessInstRW();
+  void ProcessItinRW();
+  void GenerateInstRWs(Instruction *instr,
+                       const std::vector<std::string> &rw_units,
+                       const std::string &first, const std::string &model);
+  void CheckSchedulingInfo();
+
+  bool IsIssueSlot(const std::string &name) const;
+  void PreprocessItinStages();
+  void PreprocessInstrItineraryData();
+  void ProcessItineraries();
+
+  // Processing register member expressions
+  RegSet ParseExpr(char *&input);
+  RegSet RegClassAdd(char *&input);
+  RegSet RegClassSub(char *&input);
+  RegSet RegClassAnd(char *&input);
+  RegSet RegClassShl(char *&input);
+  RegSet RegClassRotl(char *&input);
+  RegSet RegClassRotr(char *&input);
+  RegSet RegClassTrunc(char *&input);
+  RegSet RegClassSequence(char *&input);
+  RegSet RegClassDecimate(char *&input);
+
+  RegSet GetMemberList(RegisterClass *reg_class);
+  void ScanRegisterClass(std::ifstream &in, const std::string &name);
+  void ScanUnknownRegisterClass(std::ifstream &in, const std::string &name);
+  void ScanRegister(std::ifstream &in, const std::string &name);
+
+  std::vector<std::string> *ScanInstructSubunit(char *input);
+  std::string ScanInstructParent(char *input);
+  std::string ScanInstructPattern(char *input);
+  std::string ScanInstructAssy(char *input);
+  std::string ScanType(char *input);
+  std::vector<std::string> ScanInstructOpnds(char *input);
+  std::vector<std::string> ScanImplicitDefsUses(char *input);
+  void ScanProcessorModel(std::ifstream &in);
+  void ScanProcResource(std::ifstream &in, const std::string &name,
+                        const char *paren);
+  void ScanComboFuncData(std::ifstream &in);
+  void ScanSchedMachineModel(std::ifstream &in, const std::string &name);
+  void ScanSchedReadWrite(std::ifstream &in, const std::string &def_name,
+                          const char *paren, bool is_write);
+  void AddSchedReadWrite(const std::string &name,
+                         const std::string &sched_model,
+                         const std::vector<std::string> &func_units,
+                         const std::vector<std::string> &res_cycles,
+                         const std::vector<std::string> &start_cycles,
+                         const std::string &latency,
+                         const std::string &micro_ops, bool is_write,
+                         bool is_begin_group, bool is_end_group,
+                         bool is_single_issue, bool retire_ooo);
+  std::string ScanMember(char *input);
+  std::string ScanFixedMember(char *input);
+  std::string ScanCode(std::ifstream &in, std::string code);
+  StringVec ScanList(char *input);
+  StringSet ScanSet(char *input);
+  StringVec ScanStrings(char *input);
+  void ScanInstrs(char *input, StringVec &instrs, StringVec &regex);
+
+  void ScanOperand(std::ifstream &in, const std::string &name);
+  void ScanRegisterOperand(std::ifstream &in, const std::string &name);
+  void ScanPatternOperator(std::ifstream &in, const std::string &name);
+  void ScanValueType(std::ifstream &in, const std::string &name);
+  void ScanPointerLikeRegClass(std::ifstream &in, const std::string &name);
+  void ScanInstruction(std::ifstream &in, const std::string &name,
+                       const char *paren);
+  std::string ScanName(char *input);
+  void ScanInstRW(std::ifstream &in, const std::string &name);
+  void ScanItinRW(std::ifstream &in, const std::string &name);
+  void ScanSchedVariant(std::ifstream &in, const std::string &name,
+                        const char *paren);
+  void ScanSchedVar(std::ifstream &in, const std::string &name);
+  void ScanMCSchedPredicate(std::ifstream &in, const std::string &name);
+  void ScanSchedPredicate(std::ifstream &in, const std::string &name);
+  void ScanMCInstPredicate(std::ifstream &in, const std::string &name,
+                           const char *paren);
+  void ScanMCStatement(std::ifstream &in, const std::string &name,
+                       const char *paren);
+  void ScanMCOpcodeSwitchCase(std::ifstream &in, const std::string &name);
+  void ScanSchedAlias(std::ifstream &in, const std::string &name);
+  void ScanWriteSequence(std::ifstream &in, const std::string &name);
+  void ScanProcessorItineraries(std::ifstream &in, const std::string &name);
+  void ScanInstrStage(std::ifstream &in, const std::string &name);
+  void ScanInstrItineraryData(std::ifstream &in, const std::string &name);
+  void ScanBypasses(std::ifstream &in, const std::string &name);
+  void ScanInstrItineraryClasses(std::ifstream &in, const std::string &name);
+  void AddImplicitOperands();
+
+  void ScanClass(std::ifstream &in, char *input);
+  void ScanDef(std::ifstream &in, char *input);
+  void ScanFile(const char *file);
+
+  bool IsPredicateReferenced(const std::string &name);
+  bool OutputPredicate(const std::string &name);
+  void SetPredicateReferenced(const std::string &name);
+
+  std::string PredName(const std::string &name);
+  std::string FormatPred(const std::string &name, int indent);
+
+  std::map<std::string, MCInstPredicate *> mc_inst_predicates() {
+    return mc_inst_predicates_;
+  }
+  std::map<std::string, MCSchedPredicate *> mc_sched_predicates() {
+    return mc_sched_predicates_;
+  }
+  std::map<std::string, MCStatement *> statements() { return statements_; }
+  std::map<std::string, MCOpcodeSwitchCase *> switch_cases() {
+    return switch_cases_;
+  }
+  std::map<std::string, SchedVar *> sched_vars() { return sched_vars_; }
+  std::map<std::string, SchedReadWrite *> rw_units() { return rw_units_; }
+  std::vector<InstRW *> &instrw_info() { return instrw_info_; }
+  std::vector<ItinRW *> &itinrw_info() { return itinrw_info_; }
+
+  std::map<std::string, ProcessorModel *> cpus() { return cpus_; }
+  std::map<std::string, SchedMachineModel *> sched_models() {
+    return sched_models_;
+  }
+  std::map<std::string, ProcessorItineraries *> processor_itineraries() {
+    return processor_itineraries_;
+  }
+  ProcessorItineraries *processor_itinerary(std::string &name) {
+    if (!processor_itineraries_.count(name))
+      return nullptr;
+    return processor_itineraries_[name];
+  }
+  std::map<std::string, ProcResource *> &proc_resources() {
+    return proc_resources_;
+  }
+  ForwardingNetwork &forwarding_network() { return forwarding_network_; }
+
+  std::string FormatName(const std::string &name) const;
+
+  bool IsInstruction(const std::string &name) const {
+    return instructions_.count(name);
+  }
+  bool IsOperand(const std::string &name) const {
+    return operands_.count(name);
+  }
+  bool IsRegister(const std::string &name) const {
+    return register_dict_.count(name);
+  }
+  bool IsRegisterClass(const std::string &name) const {
+    return register_class_list_.count(name);
+  }
+  bool IsProcessorModel(const std::string &name) const {
+    return cpus_.count(name);
+  }
+  bool IsSchedModel(const std::string &name) const {
+    return sched_models_.count(name);
+  }
+  bool IsProcResource(const std::string &name) const {
+    return proc_resources_.count(name);
+  }
+  bool IsProcResourceGroup(const std::string &name) const {
+    return IsProcResource(name) &&
+           proc_resources_.at(name)->is_resource_group();
+  }
+  bool IsComboUnit(const std::string &name) const {
+    return combo_units_.count(name);
+  }
+  bool IsSchedReadWrite(const std::string &name) const {
+    return rw_units_.count(name);
+  }
+  bool IsSchedVariant(const std::string &name) const {
+    return sched_variants_.count(name);
+  }
+  bool IsSchedVar(const std::string &name) const {
+    return sched_vars_.count(name);
+  }
+  bool IsMCSchedPredicate(const std::string &name) const {
+    return mc_sched_predicates_.count(name);
+  }
+  bool IsSchedPredicate(const std::string &name) const {
+    return sched_predicates_.count(name);
+  }
+  bool IsMCInstPredicate(const std::string &name) const {
+    return mc_inst_predicates_.count(name);
+  }
+  bool IsMCStatement(const std::string &name) const {
+    return statements_.count(name);
+  }
+  bool IsMCOpcodeSwitchCase(const std::string &name) const {
+    return switch_cases_.count(name);
+  }
+  bool IsWriteSequence(const std::string &name) const {
+    return write_sequences_.count(name);
+  }
+  bool IsSchedAliasMatch(const std::string &name) const {
+    return sched_alias_matches_.count(name);
+  }
+  bool IsProcessorItinerary(const std::string &name) const {
+    return processor_itineraries_.count(name);
+  }
+  bool IsInstrItineraryClass(const std::string &name) const {
+    return instr_itinerary_classes_.count(name);
+  }
+  bool IsInstrItineraryData(const std::string &name) const {
+    return instr_itinerary_data_.count(name);
+  }
+  bool IsInstrStage(const std::string &name) const {
+    return instr_stages_.count(name);
+  }
+  bool IsBypass(const std::string &name) const { return bypasses_.count(name); }
+
+  SchedReadWrite *GetSchedReadWrite(const std::string &name,
+                                    const std::string &model) const;
+  std::string GetSchedRef(const std::string &name,
+                          const std::string &model) const;
+  bool IsWrite(const std::string &name, const std::string &model) const;
+  bool IsVariadic(const std::string &name, const std::string &model) const;
+
+  void ExpandCombos(StringVec &result, std::vector<StringVec *> &input,
+                    std::string item, int level);
+
+  std::vector<std::string> ResourceCombos(std::vector<std::string> &stages,
+                                          int slot_stages);
+
+  std::map<std::vector<std::string>, ItineraryFuncUnit *> &itinerary_fus() {
+    return itinerary_fus_;
+  }
+
+  // Create a new functional unit template based on an itinerary.
+  ItineraryFuncUnit *add_itinerary_fu(InstrItineraryData *itin);
+
+  std::map<std::string, int> &subunits() { return subunits_; }
+  int add_subunit(const std::string &subunit) {
+    if (subunits_.count(subunit))
+      return subunits_[subunit];
+    int size = subunits_.size();
+    subunits_.insert({subunit, size});
+    return size;
+  }
+
+  std::map<std::string, int> &subunit_bases() { return subunit_bases_; }
+  int add_subunit_base(const std::string &base) {
+    if (subunit_bases_.count(base))
+      return subunit_bases_[base];
+    int size = subunit_bases_.size();
+    subunit_bases_.insert({base, size});
+    return size;
+  }
+
+  std::map<std::string, int> &latencies() { return latencies_; }
+  int add_latency(const std::string &latency) {
+    if (latencies_.count(latency))
+      return latencies_[latency];
+    int size = latencies_.size();
+    latencies_.insert({latency, size});
+    return size;
+  }
+
+  int max_pipeline_phase() const { return max_pipeline_phase_; }
+  void set_max_pipeline_phase(int phase) {
+    max_pipeline_phase_ = std::max(phase, max_pipeline_phase_);
+  }
+
+  InstrItineraryData *instr_itinerary_data(const std::string &name) {
+    return instr_itinerary_data_[name];
+  }
+  std::vector<InstrItineraryData *> &
+  instr_itinerary_class(const std::string name) {
+    return instr_itinerary_class_[name];
+  }
+  InstrStage *instr_stages(const std::string &name) {
+    return instr_stages_[name];
+  }
+  std::string FormatItinSubunit(Instruction *inst, InstrItineraryData *itin);
+
+  std::fstream &output_arch() const { return *output_arch_; }
+  std::fstream &output_inst() const { return *output_inst_; }
+
+private:
+  std::string family_;         // which processor family we're looking for
+  std::string output_name_;    // output file name stem
+  bool ignore_subunits_;       // don't check for subunits
+  bool no_warnings_;           // suppress warnings
+  bool gen_subunit_bases_;     // generate instruction bases for subunits
+  bool gen_forwarding_info_;   // generate forwarding information
+  bool gen_operand_indexes_;   // generate indexes in references (vs names)
+  std::fstream *output_arch_;  // generated architecture description
+  std::fstream *output_inst_;  // generate instruction info
+  std::string arch_file_name_; // main output file name
+  std::string inst_file_name_; // instruction output file name
+
+  //-------------------------------------------------------------------------
+  // Instruction information we're collecting globally from the input file:
+  //   - Instruction definitions.
+  //   - Operand definitions.
+  //   - Register class definitions.
+  //   - Register definitions.
+  //   - Value Type definitions.
+  //   - Pointer-like Register Class definitions.
+  //-------------------------------------------------------------------------
+  std::map<std::string, Instruction *> instructions_;
+  std::map<std::string, Operand *> operands_;
+  std::map<std::string, RegisterClass *> register_class_list_;
+  StringSet value_type_set_;
+  StringSet pointer_like_reg_class_;
+  RegDefinitions registers_;
+  StringSet register_dict_;
+  RegSet register_sets_;
+  int max_pipeline_phase_ = 0;
+
+  //-------------------------------------------------------------------------
+  // Architectural information we're scraping from the TableGen file:
+  //   - CPU definitions.
+  //   - SchedModel definitions.
+  //   - Functional unit definitions. (ProcResources)
+  //   - InstRW definitions.
+  //   - ItinRW definitions.
+  //   - SchedReadWrite definitions.
+  //     - SchedVariant definitions.
+  //     - SchedVar definitions.
+  //     - [MC]SchedPredicate definitions.
+  //     - SchedAlias definitions.
+  //     - WriteSequence definitions.
+  //   - Forwarding network description.
+  //   - ProcessorItineraries definitions.
+  //     - InstrItinData definitions.
+  //     - Itinerary Bypass definitions.
+  //     - InstrStage definitions.
+  //-------------------------------------------------------------------------
+  std::map<std::string, ProcessorModel *> cpus_;
+  std::map<std::string, SchedMachineModel *> sched_models_;
+  std::map<std::string, ProcResource *> proc_resources_;
+  std::map<std::string, std::vector<std::string>> combo_units_;
+
+  std::vector<InstRW *> instrw_info_;
+  std::vector<ItinRW *> itinrw_info_;
+  std::map<std::string, SchedReadWrite *> rw_units_;
+  std::map<std::string, SchedVariant *> sched_variants_;
+  std::map<std::string, SchedVar *> sched_vars_;
+
+  std::map<std::string, MCSchedPredicate *> mc_sched_predicates_;
+  std::map<std::string, SchedPredicate *> sched_predicates_;
+  std::map<std::string, MCInstPredicate *> mc_inst_predicates_;
+  std::map<std::string, MCStatement *> statements_;
+  std::map<std::string, MCOpcodeSwitchCase *> switch_cases_;
+
+  std::map<std::string, SchedAlias *> sched_alias_;
+  std::map<std::string, std::map<std::string, std::string>>
+      sched_alias_matches_;
+
+  ForwardingNetwork forwarding_network_;
+
+  std::map<std::string, WriteSequence *> write_sequences_;
+
+  std::map<std::string, ProcessorItineraries *> processor_itineraries_;
+  StringSet instr_itinerary_classes_;
+  StringSet bypasses_;
+  std::map<std::string, InstrItineraryData *> instr_itinerary_data_;
+  std::map<std::string, std::vector<InstrItineraryData *>>
+      instr_itinerary_class_;
+  std::map<std::string, InstrStage *> instr_stages_;
+
+  //-------------------------------------------------------------------------
+  // Subunits and latencies created for schedules.
+  //-------------------------------------------------------------------------
+  std::map<std::string, int> subunit_bases_;
+  std::map<std::string, int> subunits_;
+  std::map<std::string, int> latencies_;
+
+  //-------------------------------------------------------------------------
+  // Subunit ids associated with each sched model.
+  //-------------------------------------------------------------------------
+  std::map<int, std::string> sched_model_subunits_;
+
+  //-------------------------------------------------------------------------
+  // Functional units and subunits create for itineraries.
+  // These objects are indexed with a vector of stage names.
+  //-------------------------------------------------------------------------
+  std::map<std::vector<std::string>, ItineraryFuncUnit *> itinerary_fus_;
+  std::map<std::vector<std::string>, int> itinerary_subunits_;
+};
+
+} // namespace scan
+} // namespace mdl
+} // namespace mpact
+
+#endif // TDSCAN_SCAN_H_
diff --git a/llvm/utils/TdScan/scan.cpp b/llvm/utils/TdScan/scan.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/utils/TdScan/scan.cpp
@@ -0,0 +1,1816 @@
+//===- scan.cpp - Read in tablegen records and generate and MDL -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Read through an llvm table-gen-generated "reports.txt" input file, and
+// compile a table of instructions and their attributes (operands, syntax,
+// etc), operand definitions, and register/register class definitions. The MDL
+// compiler will use these definitions to tie the architecture description
+// back to llvm-defined instructions, registers, and register classes.  We
+// write these definitions out in the MDL language (we can optionally generate
+// a CSV file that contains all the instruction information).
+//
+// Since the input file is machine-generated, we assume the file is (very)
+// well-formed, but still do some simple sanity checks to avoid crashes.
+//
+// For each instruction definition, we collect:
+//    - its name.
+//    - its output operand names and types.
+//    - its input operand names and types.
+//    - its assembly formatting rule.
+//    - its llvm matching pattern.
+//    - its parent instruction (if present).
+//    - its subunit identifier(s).
+//
+// For each operand definition, we collect its set of sub-operands, and type.
+// We also look for register definitions, and register class definitions.
+//
+// Based on a command-line switch `gen_arch_spec`, we collect the information
+// related to CPU variants, the functional units supported by each variant, and
+// the subunits supported by each functional unit.  When using this switch, we
+// create a complete MDL description for the architecture that captures all the
+// subtargets, functional units, instruction latencies, resource usage, and
+// parallel issue rules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "scan.h"
+
+#include <fstream>
+#include <iostream>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+//-------------------------------------------------------------------------
+// Command line options.
+//-------------------------------------------------------------------------
+cl::opt<std::string> input_file(cl::Positional, cl::desc("<input-file>"));
+
+cl::opt<std::string>
+    family_name("family_name", cl::init(""), cl::value_desc("family"),
+                cl::desc("Processor family name (default=<input file>)."));
+cl::opt<std::string> output_name(
+    "output_name", cl::init(""), cl::value_desc("output"),
+    cl::desc("Output file name prefix (default=<input file name>)"));
+
+cl::opt<bool> ignore_subunits("ignore_subunits", cl::init(false),
+                              cl::desc("Ignore subunits."));
+cl::opt<bool>
+    gen_arch_spec("gen_arch_spec", cl::init(false),
+                  cl::desc("Auto generate the mdl specification. (-g)"));
+cl::alias gen_arch_specA("g", cl::desc("Alias for --gen_arch_spec"),
+                         cl::aliasopt(gen_arch_spec));
+
+cl::opt<bool> generate_base_subunits(
+    "gen_base_subunits", cl::init(false),
+    cl::desc("Generate instruction bases for each subunit"));
+cl::opt<bool> generate_forwarding_info(
+    "gen_forwarding", cl::init(false),
+    cl::desc("Generate forwarding information each CPU"));
+cl::opt<bool>
+    generate_operand_indexes("gen_indexes", cl::init(false),
+                             cl::desc("Generate operand indexes (vs names)"));
+
+cl::opt<std::string> output_dir("output_dir", cl::init(""),
+                                cl::value_desc("dir"),
+                                cl::desc("Output path for generated files."));
+cl::opt<bool> no_warnings("nowarnings", cl::init(false),
+                          cl::desc("Suppress warnings. (-q)"));
+cl::alias quiet_mode("q", cl::desc("Alias for --nowarnings"),
+                     cl::aliasopt(no_warnings));
+
+cl::OptionCategory TdDebug("Debugging Options");
+cl::opt<bool> generate_csv("dump_csv", cl::init(false), cl::cat(TdDebug),
+                           cl::desc("Dump instructions to a CSV file."));
+cl::opt<bool> dump_debug("dump_debug", cl::init(false), cl::cat(TdDebug),
+                         cl::desc("Dump lots of debug info."));
+cl::opt<bool> dump_instr_forwarding("dump_instr_forwarding", cl::init(false),
+                                    cl::cat(TdDebug),
+                                    cl::desc("Dump forwarding info."));
+cl::opt<bool> dump_forwarding("dump_forwarding", cl::init(false),
+                              cl::cat(TdDebug),
+                              cl::desc("Dump forwarding info."));
+cl::opt<bool> dump_rw("dump_rw", cl::init(false), cl::cat(TdDebug),
+                      cl::desc("Dump ReadWrite Records"));
+cl::alias dump_debugA("d", cl::desc("Alias for --dump_debug"),
+                      cl::aliasopt(dump_debug));
+
+// Check command line arguments and manage help option messages.
+static void usage(int argc, char **argv) {
+  if (argc < 2) {
+    std::cerr << "usage: scan <input-file>\n"
+                 "    --help: print program options\n";
+    exit(EXIT_FAILURE);
+  }
+
+  cl::getRegisteredOptions()["version"]->setHiddenFlag(cl::Hidden);
+  cl::getRegisteredOptions()["color"]->setHiddenFlag(cl::ReallyHidden);
+  cl::ParseCommandLineOptions(argc, argv, "Tablegen Scan Utility");
+
+  if (input_file.empty()) {
+    std::cerr << "Error: no input file\n";
+    exit(EXIT_FAILURE);
+  }
+}
+
+//-------------------------------------------------------------------------
+// Scan a Tablegen-generated records file, find definitions we're
+// interested in and add to the machine description.
+//-------------------------------------------------------------------------
+int main(int argc, char **argv) {
+  // Process command-line options.
+  usage(argc, argv);
+
+  // If a family name isn't specified, extract it from the input file name.
+  if (family_name.empty())
+    family_name = std::filesystem::path(input_file.c_str()).stem();
+  if (family_name.empty()) {
+    std::cerr << "Please specify a family name\n";
+    exit(EXIT_FAILURE);
+  }
+
+  if (gen_arch_spec)
+    ignore_subunits = true;
+  mpact::mdl::scan::MachineDescription md_info(
+      family_name, output_name, ignore_subunits, no_warnings,
+      generate_base_subunits, generate_forwarding_info,
+      generate_operand_indexes);
+
+  // go read the file and collect instruction info
+  md_info.ScanFile(input_file.c_str());
+
+  // Augment an instructions's implicit input & output operands with that of its
+  // base instruction.
+  md_info.AddImplicitOperands();
+
+  // Note in each operand definition whether its been referenced or not.
+  md_info.FindReferencedOperands();
+
+  if (generate_csv) {
+    md_info.WriteInstructionsCsv();
+    exit(EXIT_SUCCESS);
+  }
+
+  // Generate an architecture description from tablegen info, if requested.
+  if (gen_arch_spec) {
+    md_info.PopulateCPUsWithFUs();
+    md_info.ProcessItinRW();
+    md_info.ProcessInstRW();
+    md_info.ProcessItineraries();
+    md_info.CheckSchedulingInfo();
+    if (dump_debug)
+      md_info.DumpTableGenInfo();
+    if (dump_forwarding)
+      md_info.DumpForwardingGraph();
+    if (dump_instr_forwarding)
+      md_info.DumpInstrForwardingInfo();
+  }
+
+  // Debug - write out each SchedReadWrite.
+  if (dump_rw)
+    md_info.DumpSchedReadWrite(false);
+
+  // Write out the MDL file.
+  md_info.WriteMDL(input_file, gen_arch_spec, output_dir);
+  exit(EXIT_SUCCESS);
+}
+
+namespace mpact {
+namespace mdl {
+namespace scan {
+
+//-------------------------------------------------------------------------
+// Read in an input line, skip over leading spaces.
+//-------------------------------------------------------------------------
+char *MachineDescription::GetLine(std::ifstream &in, std::string &input) {
+  std::getline(in, input);
+  if (input == "}")
+    return nullptr;
+
+  std::size_t found = input.find_first_not_of(" \t");
+  if (found != std::string::npos)
+    return &input[found];
+  return &input[0];
+}
+
+//-------------------------------------------------------------------------
+// Skip over the unused part of a Def record.
+//-------------------------------------------------------------------------
+void MachineDescription::SkipRecord(std::ifstream &in) {
+  std::string input;
+  while (in && GetLine(in, input) != nullptr) {
+  }
+}
+
+//-------------------------------------------------------------------------
+// Scan a register class definition, save off class.
+// Note that we cannot populate the class until we're read in all the
+// classes, since classes can be composed of subclasses which we haven't
+// seen yet.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanRegisterClass(std::ifstream &in,
+                                           const std::string &name) {
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto MemberList = "dag MemberList = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, MemberList, strlen(MemberList))) {
+      register_class_list_.emplace(
+          name, new RegisterClass(name, lstart + strlen(MemberList)));
+      SkipRecord(in);
+      return;
+    }
+  }
+}
+
+//-------------------------------------------------------------------------
+// Scan an "unknown" register class, and create an empty register class.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanUnknownRegisterClass(std::ifstream &in,
+                                                  const std::string &name) {
+  register_class_list_.emplace(name, new RegisterClass(name, ""));
+  SkipRecord(in);
+}
+
+//-------------------------------------------------------------------------
+// Scan a SDPatternOperator operator.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanPatternOperator(std::ifstream &in,
+                                             const std::string &name) {
+  std::vector<std::string> ops;
+  operands_[name] = new Operand(name, ops, "i32");
+  SkipRecord(in);
+}
+
+//-------------------------------------------------------------------------
+// Scan a ValueType record.
+// We're really just interested in saving these names, since they are
+// used to define operands.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanValueType(std::ifstream &in,
+                                       const std::string &name) {
+  value_type_set_.insert(name);
+  SkipRecord(in);
+}
+
+//-------------------------------------------------------------------------
+// Scan a PointerLikeRegClass record.
+// We're really just interested in saving these names, since they are
+// used to define operands.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanPointerLikeRegClass(std::ifstream &in,
+                                                 const std::string &name) {
+  pointer_like_reg_class_.insert(name);
+  SkipRecord(in);
+}
+//-------------------------------------------------------------------------
+// Scan a register definition, save off register.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanRegister(std::ifstream &in,
+                                      const std::string &name) {
+  std::string name_space; // Namespace attribute.
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto namespace__str = "string Namespace = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, namespace__str, strlen(namespace__str)))
+      name_space = ScanName(lstart + strlen(namespace__str));
+  }
+
+  // If definition wasn't part of the current family, ignore.
+  if (name_space != family())
+    return;
+
+  registers_.insert(RegDefinition(name));
+  register_dict_.insert(name);
+}
+
+//-------------------------------------------------------------------------
+// Read in a subunit record.
+// Lines are of the form: list<string> = ["name" (',' "name")* ];
+// Input points just past the first bracket.
+// Return the vector of subunit names.
+//-------------------------------------------------------------------------
+std::vector<std::string> *MachineDescription::ScanInstructSubunit(char *input) {
+  auto *result = new std::vector<std::string>;
+
+  while (char *name = strtok_r(input, " ,];", &input))
+    result->push_back(name + 3); // Skip over the prefix.
+
+  if (result->empty()) {
+    delete result;
+    return nullptr;
+  }
+  return result;
+}
+
+//-------------------------------------------------------------------------
+// Read in the entire contents of a record's field interpreted as a list.
+//-------------------------------------------------------------------------
+StringVec MachineDescription::ScanList(char *input) {
+  std::vector<std::string> result;
+
+  while (char *name = strtok_r(input, " ,]);", &input))
+    result.push_back(name);
+  return result;
+}
+
+//-------------------------------------------------------------------------
+// Read in the entire contents of a record's field interpreted as a set.
+//-------------------------------------------------------------------------
+StringSet MachineDescription::ScanSet(char *input) {
+  StringSet result;
+
+  while (char *name = strtok_r(input, " ,]);", &input))
+    result.insert(name);
+  return result;
+}
+
+//-------------------------------------------------------------------------
+// Read in a list of quoted strings.
+//-------------------------------------------------------------------------
+StringVec MachineDescription::ScanStrings(char *input) {
+  StringVec result;
+
+  while (*input == '"') {
+    char *name = input + 1;
+    input = strchr(name, '"');
+    *input++ = 0;
+    result.push_back(name);
+    if (*input == ',')
+      input++;
+    while (isspace(*input))
+      input++;
+  }
+  return result;
+}
+
+//-------------------------------------------------------------------------
+// Read in the entire contents of a record's field as is.
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanMember(char *input) {
+  char *end = strchr(input, ';'); // Get rid of any ';'.
+  if (end)
+    *end = 0;
+  if (strcmp(input, "\"\"") == 0)
+    return "";
+
+  std::string name = input;
+  return name;
+}
+
+//-------------------------------------------------------------------------
+// Read in a field that might contain a "empty" string, in which case we
+// just return the empty string.
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanFixedMember(char *input) {
+  return FixAttribute(ScanMember(input));
+}
+
+//-------------------------------------------------------------------------
+// Read in the entire contents of a record's field interpreted as code.
+// We handle three representations of code:
+//     - code in quotes: "a + b + c"
+//     - code delineated by [{ ... }]
+//     - multi-line code delineated by [{ ... }]
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanCode(std::ifstream &in, std::string code) {
+  char *lstart;
+  std::string input;
+
+  // Handle quoted code.
+  if (code[0] == '"') {
+    return code.substr(1, code.find("\";") - 1);
+  }
+
+  // Handle single-line code fragments.
+  auto it = code.find("}]");
+  if (it != std::string::npos)
+    return code.substr(2, it - 2);
+
+  // Handle multi-line code fragments.
+  code = code.substr(2);
+  while (in && (lstart = GetLine(in, input))) {
+    auto first_non_ws = input.find_first_not_of(" \t");
+    if (first_non_ws != std::string::npos)
+      input = " " + input.substr(first_non_ws);
+
+    auto it = input.find("}]");
+
+    if (it != std::string::npos) {
+      code += input.substr(0, it);
+      return code;
+    } else {
+      code += input;
+    }
+  }
+  // If we never found the terminator, abort.
+  assert(0 && "Something went wrong while parsing predicate code!");
+  return code;
+}
+
+//-------------------------------------------------------------------------
+// Read in an instruction parent record.
+// Lines are of the form: Instruction Unbundled = ...
+// Input points to the beginning of the parent name.
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanInstructParent(char *input) {
+  char *end = strchr(input, ';'); // Get rid of any ';'.
+  if (end)
+    *end = 0;
+
+  std::string name = input;
+  return name;
+}
+
+//-------------------------------------------------------------------------
+// Read in a pattern record.
+// Lines are of the form: list<dag> Pattern = [(...)];
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanInstructPattern(char *input) {
+  char *start = strchr(input, '(');
+  if (start == nullptr)
+    return {};
+
+  char *end = strrchr(start + 1, ']'); // Find last ].
+  if (end == nullptr)
+    return {};
+
+  *end = 0;
+  return start;
+}
+
+//-------------------------------------------------------------------------
+// Read in a string field.
+// The input is of the form: "name";
+// Returns the string with quotes stripped off.
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanName(char *input) {
+  char *end = strchr(input + 1, '"'); // Get rid of any trailing quote.
+  if (end)
+    *end = 0;
+  return input + 1; // Don't include the first quote.
+}
+
+//-------------------------------------------------------------------------
+// Read in assembly syntax records.
+// Lines are of the form: string AsmString = "..."
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanInstructAssy(char *input) {
+  // Find beginning/end of the assembly string, terminate (include quotes).
+  char *start = strchr(input, '"');
+  if (start == nullptr)
+    return {};
+  char *end = strrchr(start + 1, '"'); // Find last ".
+  if (end == nullptr)
+    return {};
+  end[1] = 0; // terminate the string
+
+  // Convert all the tabs to spaces (for export to csv files).
+  for (char *tab = start; *tab; tab++)
+    if (*tab == '\t')
+      *tab = ' ';
+
+  return start; // Return the string with the quotes.
+}
+
+//-------------------------------------------------------------------------
+// Return the type of an operand.
+//-------------------------------------------------------------------------
+std::string MachineDescription::ScanType(char *input) {
+  char *end = strchr(input, ';');
+  if (end)
+    *end = 0;
+  return input;
+}
+
+//-------------------------------------------------------------------------
+// Scan ins or outs operand records.
+// Lines are of the form: dag InOperandList = (ins  opnd, opnd, opnd, ...);
+//                        dag OutOperandList = (outs opnd, opnd, opnd, ...);
+//                        dag MIOperandInfo = (ops opnd, opnd, opnd, ...);
+// "input" points to right after the "ins" / "outs" / "ops".
+//-------------------------------------------------------------------------
+std::vector<std::string> MachineDescription::ScanInstructOpnds(char *input) {
+  std::vector<std::string> opnds;
+
+  // Grab comma-separated strings and append to the opnds vector.
+  // There are three types of operand strings:
+  //    - regular: <operand-type> ':$' <operand-name>
+  //    - "complex": '(' <operand-type> ('?:$' <name>)+ ')' ':$" <operand_name>
+  //    - ellipsis operands: "variable_ops"
+  while (char *opnd = strtok_r(input, " ,);", &input)) {
+    // Look for ellipsis operands.
+    if (!strcmp(opnd, "variable_ops")) {
+      opnds.push_back("...");
+      continue;
+    }
+    // Parse complex operands.  We don't need the operand names.
+    // (If we did need the names, we could create a specialized operand using
+    // the names provided, but this isn't necessary).
+    if (opnd[0] == '(') {
+      auto complex = std::string(opnd + 1);     // grab the operand type
+      opnd = strtok_r(input, ")", &input);      // skip rest of complex opnd
+      opnd = strtok_r(input, ":$ ,);", &input); // grab the operand name
+      complex += "  " + std::string(opnd);      // (yes, two spaces needed...)
+      opnds.push_back(complex);
+      continue;
+    }
+    // Parse regular operands.  Just replace the : and $ with two spaces.
+    if (char *colon = strchr(opnd, ':'))
+      *colon = ' ';
+    if (char *dollar = strchr(opnd, '$'))
+      *dollar = ' ';
+    opnds.push_back(opnd);
+  }
+  return opnds;
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a register operand definition, looking
+// for the only field "RegisterClass RegClass" we're interested in.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanRegisterOperand(std::ifstream &in,
+                                             const std::string &name) {
+  std::vector<std::string> ops; // Operand records we find.
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto regClassString = "RegisterClass RegClass = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, regClassString, strlen(regClassString))) {
+      char *input = lstart + strlen(regClassString);
+      char *regClass = strtok_r(input, ";", &input);
+      ops.push_back(std::string(regClass) + " reg");
+    }
+  }
+
+  operands_[name] = new Operand(name, ops, "i32");
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with an operand definition, looking for
+// fields we're interested in:
+//    - the "ops"
+//    - the "type"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanOperand(std::ifstream &in,
+                                     const std::string &name) {
+  std::vector<std::string> ops; // Operand records we find.
+  std::string type;             // Type of operand (ie i32).
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto typeString = "ValueType Type = ";
+  constexpr auto opsString = "dag MIOperandInfo = (ops";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, typeString, strlen(typeString)))
+      type = ScanType(lstart + strlen(typeString));
+    else if (!strncmp(lstart, opsString, strlen(opsString)))
+      ops = ScanInstructOpnds(lstart + strlen(opsString));
+  }
+
+  //---------------------------------------------------------------------
+  // Inexplicably, SystemZ has a pc-relative operand called pcrel32 that
+  // has a single suboperand named pcrel32. Recursively-defined operands
+  // are considered (by this tool) to be nonsensical, so we look for this
+  // case and delete the suboperand.
+  //---------------------------------------------------------------------
+  if (ops.size() == 1 && ops[0] == name)
+    ops.clear();
+
+  // Create an operand description object.
+  operands_[name] = new Operand(name, ops, type);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a CPU definition, looking for
+// fields we're interested in:
+//    - the "Name"
+//    - the "SchedModel"
+//    - the "processor itineraries
+//-------------------------------------------------------------------------
+void MachineDescription::ScanProcessorModel(std::ifstream &in) {
+  std::string name;
+  std::string sched_model;
+  std::string input;
+  std::string itineraries;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto name_str = "string Name = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+  constexpr auto itineraries_str = "ProcessorItineraries ProcItin = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, name_str, strlen(name_str)))
+      name = ScanName(lstart + strlen(name_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+    else if (!strncmp(lstart, itineraries_str, strlen(itineraries_str)))
+      itineraries = ScanMember(lstart + strlen(itineraries_str));
+  }
+
+  if (sched_model.empty())
+    std::cerr << "No schedule model for CPU " << name << "\n";
+
+  // Promote processor itineraries to a new SchedModel that uses the
+  // itineraries, and use that SchedModel to initialize the ProcessorModel.
+  if (!itineraries.empty() && itineraries != "NoItineraries") {
+    if (sched_model != "NoSchedModel")
+      std::cerr << formatv("Warning: {0} has a SchedModel and Itineraries\n",
+                           sched_model);
+    if (sched_models_.count(itineraries) == 0)
+      sched_models_[itineraries] =
+          new SchedMachineModel(itineraries, itineraries, family());
+    sched_model = itineraries;
+  }
+
+  cpus_[name] = new ProcessorModel(name, sched_model);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a ProcResource definition, looking for
+// fields we're interested in:
+//    - the number of instances for this unit (defaults to 1).
+//    - the size of the input buffer.
+//    - which SchedModel this unit is associated with.
+//    - its ProcResource group (optionally).
+//    - Its parent functional unit (optionally)
+//-------------------------------------------------------------------------
+void MachineDescription::ScanProcResource(std::ifstream &in,
+                                          const std::string &name,
+                                          const char *paren) {
+  // Fields scraped from the definition.
+  std::string count = "1";
+  std::string buffer_size = "-1";
+  std::string sched_model;
+  std::vector<std::string> func_units;
+  std::string super("");
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto num_unit_str = "int NumUnits = ";
+  constexpr auto buffer_size_str = "int BufferSize = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+  constexpr auto func_units_str = "list<ProcResource> Resources = [";
+  constexpr auto super_str = "ProcResourceKind Super = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, num_unit_str, strlen(num_unit_str)))
+      count = ScanMember(lstart + strlen(num_unit_str));
+    if (!strncmp(lstart, buffer_size_str, strlen(buffer_size_str)))
+      buffer_size = ScanMember(lstart + strlen(buffer_size_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+    else if (!strncmp(lstart, func_units_str, strlen(func_units_str)))
+      func_units = ScanList(lstart + strlen(func_units_str));
+    else if (!strncmp(lstart, super_str, strlen(super_str)))
+      super = ScanFixedMember(lstart + strlen(super_str));
+  }
+
+  proc_resources_[name] =
+      new ProcResource(name, count, buffer_size, sched_model, func_units, super,
+                       strstr(paren, " ProcResGroup"));
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a ComboFuncData resource.  This maps a
+// name to several functional units that are used together.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanComboFuncData(std::ifstream &in) {
+  std::string name;
+  std::vector<std::string> func_units;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto the_combo_func_str = "FuncUnit TheComboFunc = ";
+  constexpr auto func_list_str = "list<FuncUnit> FuncList = [";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, the_combo_func_str, strlen(the_combo_func_str)))
+      name = ScanMember(lstart + strlen(the_combo_func_str));
+    if (!strncmp(lstart, func_list_str, strlen(func_list_str)))
+      func_units = ScanList(lstart + strlen(func_list_str));
+  }
+
+  // Add an entry to the combo_units table.
+  combo_units_.insert({name, func_units});
+}
+
+//-------------------------------------------------------------------------
+// Scan SchedReadWrite definitions.
+// Fields we're interested in: (for SchedWrites)
+//    - the "WriteType"
+//    - the "SchedModel"
+//    - the "ProcResources"
+//    - the "Latency"
+//    - the "NumMicroOps"
+//    - the "ResourceCycles"  (or "ReleaseAtCycles")
+//    - the "StartAtCycle"    (or "AcquireAtCycles")
+//    - flags: BeginGroup, EndGroup, SingleIssue, RetireOOO
+// Fields we're interest in: (for SchedReads)
+//    - the "WriteType"
+//    - the "SchedModel"
+//    - the "ValidWrites"
+//    - the "Latency"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanSchedReadWrite(std::ifstream &in,
+                                            const std::string &def_name,
+                                            const char *paren, bool is_write) {
+  std::string name;
+  std::string model;
+  std::string latency = "0";
+  std::string micro_ops = "0";
+  std::vector<std::string> func_units;
+  std::vector<std::string> res_cycles;
+  std::vector<std::string> start_cycles;
+  std::string input;
+  bool is_begin_group = false;
+  bool is_end_group = false;
+  bool is_single_issue = false;
+  bool retire_ooo = false;
+
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto schedwrite_str = "SchedWrite WriteType = ";
+  constexpr auto schedread_str = "SchedRead ReadType = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+  constexpr auto func_units_str = "list<ProcResourceKind> ProcResources = [";
+  constexpr auto res_cycles_str = "list<int> ResourceCycles = [";
+  constexpr auto release_cycles_str = "list<int> ReleaseAtCycles = [";
+  constexpr auto start_cycle_str = "list<int> StartAtCycle = [";
+  constexpr auto acq_cycle_str = "list<int> AcquireAtCycle = [";
+  constexpr auto valid_writes_str = "list<SchedWrite> ValidWrites = [";
+  constexpr auto latency_str = "int Latency = ";
+  constexpr auto micro_ops_str = "int NumMicroOps = ";
+  constexpr auto cycles_str = "int Cycles = ";
+  constexpr auto begin_str = "bit BeginGroup = ";
+  constexpr auto end_str = "bit EndGroup = ";
+  constexpr auto single_str = "bit SingleIssue = ";
+  constexpr auto retire_str = "bit RetireOOO = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, schedwrite_str, strlen(schedwrite_str)))
+      name = ScanMember(lstart + strlen(schedwrite_str));
+    if (!strncmp(lstart, schedread_str, strlen(schedread_str)))
+      name = ScanMember(lstart + strlen(schedread_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      model = ScanFixedMember(lstart + strlen(sched_model_str));
+    else if (!strncmp(lstart, func_units_str, strlen(func_units_str)))
+      func_units = ScanList(lstart + strlen(func_units_str));
+    else if (!strncmp(lstart, res_cycles_str, strlen(res_cycles_str)))
+      res_cycles = ScanList(lstart + strlen(res_cycles_str));
+    else if (!strncmp(lstart, release_cycles_str, strlen(release_cycles_str)))
+      res_cycles = ScanList(lstart + strlen(release_cycles_str));
+    else if (!strncmp(lstart, start_cycle_str, strlen(start_cycle_str)))
+      start_cycles = ScanList(lstart + strlen(start_cycle_str));
+    else if (!strncmp(lstart, acq_cycle_str, strlen(acq_cycle_str)))
+      start_cycles = ScanList(lstart + strlen(acq_cycle_str));
+    else if (!strncmp(lstart, valid_writes_str, strlen(valid_writes_str)))
+      func_units = ScanList(lstart + strlen(valid_writes_str));
+    else if (!strncmp(lstart, latency_str, strlen(latency_str)))
+      latency = ScanMember(lstart + strlen(latency_str));
+    else if (!strncmp(lstart, micro_ops_str, strlen(micro_ops_str)))
+      micro_ops = ScanMember(lstart + strlen(micro_ops_str));
+    else if (!strncmp(lstart, cycles_str, strlen(cycles_str)))
+      latency = ScanMember(lstart + strlen(cycles_str));
+    else if (!strncmp(lstart, begin_str, strlen(begin_str)))
+      is_begin_group = ScanMember(lstart + strlen(begin_str)) == "1";
+    else if (!strncmp(lstart, end_str, strlen(end_str)))
+      is_end_group = ScanMember(lstart + strlen(end_str)) == "1";
+    else if (!strncmp(lstart, single_str, strlen(single_str)))
+      is_single_issue = ScanMember(lstart + strlen(single_str)) == "1";
+    else if (!strncmp(lstart, retire_str, strlen(retire_str)))
+      retire_ooo = ScanMember(lstart + strlen(retire_str)) == "1";
+  }
+
+  // If this is a useless write resource, ignore it.
+  if (is_write && (model.empty() || model == "?") &&
+      (latency.empty() || latency == "0") && func_units.empty())
+    return;
+
+  // Add it to the ReadWrite table.
+  if (!strstr(paren, " WriteRes") && !strstr(paren, " ReadAdvance"))
+    name = def_name;
+  AddSchedReadWrite(name, model, func_units, res_cycles, start_cycles, latency,
+                    micro_ops, is_write, is_begin_group, is_end_group,
+                    is_single_issue, retire_ooo);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a machine model definition, looking for
+// fields we're interested in:
+//    - the "IssueWidth"
+//    - the "LoadLatency"
+//    - the "HighLatency"
+//    - the micro-op reorder buffer size
+//    - the mispredicted branch penalty
+//    - Itineraries used in this model
+//-------------------------------------------------------------------------
+void MachineDescription::ScanSchedMachineModel(std::ifstream &in,
+                                               const std::string &name) {
+  std::string issue_width = "0";
+  std::string load_latency = "0";
+  std::string high_latency = "0";
+  int mispredict = 0;
+  int micro_op_size = 0;
+  std::string itineraries;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto issue_width_str = "int IssueWidth = ";
+  constexpr auto load_latency_str = "int LoadLatency = ";
+  constexpr auto high_latency_str = "int HighLatency = ";
+  constexpr auto itineraries_str = "ProcessorItineraries Itineraries = ";
+  constexpr auto micro_op_str = "int MicroOpBufferSize = ";
+  constexpr auto mispredict_str = "int MispredictPenalty = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, issue_width_str, strlen(issue_width_str)))
+      issue_width = ScanMember(lstart + strlen(issue_width_str));
+    else if (!strncmp(lstart, load_latency_str, strlen(load_latency_str)))
+      load_latency = ScanMember(lstart + strlen(load_latency_str));
+    else if (!strncmp(lstart, high_latency_str, strlen(high_latency_str)))
+      high_latency = ScanMember(lstart + strlen(high_latency_str));
+    else if (!strncmp(lstart, micro_op_str, strlen(micro_op_str)))
+      micro_op_size = std::stoi(ScanMember(lstart + strlen(micro_op_str)));
+    else if (!strncmp(lstart, mispredict_str, strlen(mispredict_str)))
+      mispredict = std::stoi(ScanMember(lstart + strlen(mispredict_str)));
+    else if (!strncmp(lstart, itineraries_str, strlen(itineraries_str)))
+      itineraries = ScanMember(lstart + strlen(itineraries_str));
+  }
+
+  sched_models_[name] =
+      new SchedMachineModel(name, issue_width, load_latency, high_latency,
+                            micro_op_size, mispredict, itineraries, family());
+}
+
+//-------------------------------------------------------------------------
+// Process InstRW Instrs parameters records. These lines are of the form:
+// "dag Instrs = (instrs <parameters>);", and we need to parse the
+// parameters here.  Each comma-separated parameter is either a plain
+// identifier which represents an instruction name, or a regular expression
+// list of the form: (instregex <string>+).
+// Add the instruction names to the "instrs" vector, and add the regular
+// expressions to the "regex" vector.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanInstrs(char *input, StringVec &instrs,
+                                    StringVec &regex) {
+
+  for (char end = ','; end == ','; end = *input) {
+    // Skip over delimiters
+    input += strspn(input, " ,)");
+    // Handle a list of regular expressions.
+    if (strncmp(input, "(instregex", strlen("(instregex")) == 0) {
+      for (char qend = ','; qend == ','; qend = *input) {
+        char *quote = strchr(input, '"');
+        char *endquote = strchr(quote + 1, '"');
+        regex.push_back(std::string(quote + 1, endquote - quote - 1));
+        input = endquote + 1;
+      }
+      input++; // skip trailing ')'
+    } else {
+      // Handle a name.
+      char *sep = strpbrk(input, ",)");
+      instrs.push_back(std::string(input, sep - input));
+      input = sep;
+    }
+  }
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a InstRW definition, looking for
+// fields we're interested in:
+//    - the "OperandReadWrites"
+//    - the "Instrs"
+//      - the "(instrregex "<regex>");"
+//      - the "instrs Opcode1, Opcode2)"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanInstRW(std::ifstream &in,
+                                    const std::string &name) {
+  std::vector<std::string> rw_units;
+  std::string sched_model;
+  StringVec instregex;
+  StringVec instrs;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto rw_units_str = "list<SchedReadWrite> OperandReadWrites = [";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+  constexpr auto instregex_str = "dag Instrs = (instregex ";
+  constexpr auto instrs_str = "dag Instrs = (instrs ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, rw_units_str, strlen(rw_units_str)))
+      rw_units = ScanList(lstart + strlen(rw_units_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+    else if (!strncmp(lstart, instregex_str, strlen(instregex_str)))
+      instregex = ScanStrings(lstart + strlen(instregex_str));
+    else if (!strncmp(lstart, instrs_str, strlen(instrs_str)))
+      ScanInstrs(lstart + strlen(instrs_str), instrs, instregex);
+  }
+
+  instrw_info_.push_back(
+      new InstRW(name, rw_units, sched_model, instregex, instrs));
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a ItinRW definition, looking for
+// fields we're interested in:
+//    - the "OperandReadWrites"
+//    - the "Matched Itinerary Classes"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanItinRW(std::ifstream &in,
+                                    const std::string &name) {
+  std::vector<std::string> rw_units;
+  std::vector<std::string> itin_names;
+  std::string sched_model;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto rw_units_str = "list<SchedReadWrite> OperandReadWrites = [";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+  constexpr auto itins_str = "list<InstrItinClass> MatchedItinClasses = [";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, rw_units_str, strlen(rw_units_str)))
+      rw_units = ScanList(lstart + strlen(rw_units_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+    else if (!strncmp(lstart, itins_str, strlen(itins_str)))
+      itin_names = ScanList(lstart + strlen(itins_str));
+  }
+
+  itinrw_info_.push_back(new ItinRW(name, itin_names, rw_units, sched_model));
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a SchedVariant definition, looking for
+// fields we're interested in:
+//    - the "variants"
+//     - Variants composed of a predicate function and a list of rw-units.
+//    - the "SchedModel"
+//    - the "Variadic" attribute
+//-------------------------------------------------------------------------
+void MachineDescription::ScanSchedVariant(std::ifstream &in,
+                                          const std::string &name,
+                                          const char *paren) {
+  std::vector<std::string> variants;
+  std::string sched_model;
+  std::string input;
+  bool is_variadic = false;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+  constexpr auto variants_str = "list<SchedVar> Variants = [";
+  constexpr auto is_variadic_str = "bit Variadic = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+    else if (!strncmp(lstart, variants_str, strlen(variants_str)))
+      variants = ScanList(lstart + strlen(variants_str));
+    else if (!strncmp(lstart, is_variadic_str, strlen(is_variadic_str)))
+      is_variadic = ScanMember(lstart + strlen(is_variadic_str)) != "0";
+  }
+
+  assert(!sched_variants_.count(name) && "Duplicate SchedVariant Record!");
+  sched_variants_[name] =
+      new SchedVariant(name, sched_model, variants,
+                       strstr(paren, " SchedWriteVariant"), is_variadic);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a SchedVar definition, looking for
+// fields we're interested in:
+//    - the "predicate"
+//    - the "Selected"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanSchedVar(std::ifstream &in,
+                                      const std::string &name) {
+  std::vector<std::string> selected;
+  std::string predicate("");
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto predicate_str = "SchedPredicateBase Predicate = ";
+  constexpr auto selected_str = "list<SchedReadWrite> Selected = [";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, predicate_str, strlen(predicate_str)))
+      predicate = ScanMember(lstart + strlen(predicate_str));
+    else if (!strncmp(lstart, selected_str, strlen(selected_str)))
+      selected = ScanList(lstart + strlen(selected_str));
+  }
+
+  assert(!sched_vars_.count(name) && "Duplicate SchedVar Record!");
+  sched_vars_[name] = new SchedVar(name, predicate, selected);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a MCSchedPredicate definition, looking for
+// fields we're interested in:
+//    - the "Pred"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanMCSchedPredicate(std::ifstream &in,
+                                              const std::string &name) {
+  std::string mcinst_predicate;
+  std::string sched_model;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto mcinst_predicate_str = "MCInstPredicate Pred = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, mcinst_predicate_str, strlen(mcinst_predicate_str)))
+      mcinst_predicate = ScanMember(lstart + strlen(mcinst_predicate_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+  }
+
+  assert(!IsMCSchedPredicate(name) && "Duplicate MCSchedPredicate Record!");
+  mc_sched_predicates_[name] =
+      new MCSchedPredicate(name, sched_model, mcinst_predicate);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a SchedVar definition, looking for
+// fields we're interested in:
+//    - the "FunctionName"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanMCInstPredicate(std::ifstream &in,
+                                             const std::string &name,
+                                             const char *paren) {
+  std::string function_name;
+  std::string function_mapper;
+  std::string opindex;
+  std::string immval;
+  std::vector<std::string> predicates;
+  std::vector<std::string> valid_opcodes;
+  std::string register_name;
+  std::vector<std::string> attributes;
+  std::string statement;
+
+  std::string input;
+  char *lstart;
+
+  std::string pred_attributes[] = {kCheckAny,
+                                   kCheckAll,
+                                   kCheckNot,
+                                   kCheckOpcode,
+                                   kCheckIsRegOperand,
+                                   kCheckRegOperand,
+                                   kCheckSameRegOperand,
+                                   kCheckInvalidRegOperand,
+                                   kCheckIsImmOperand,
+                                   kCheckImmOperand,
+                                   kCheckZeroOperand,
+                                   kCheckFunctionPredicateWithTII,
+                                   kCheckFunctionPredicate,
+                                   kCheckNumOperands};
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto function_name_str = "string FunctionName = ";
+  constexpr auto instr_fn_name_str = "string MachineInstrFnName = ";
+  constexpr auto mc_ins_fn_str = "string MCInstFnName = ";
+  constexpr auto tii_ptr_name_str = "string TIIPtrName = ";
+  constexpr auto function_mapper_str = "string FunctionMapper = ";
+  constexpr auto opindex_str = "int OpIndex = ";
+  constexpr auto immval_str = "int ImmVal = ";
+  constexpr auto immval_str_s = "string ImmVal = ";
+  constexpr auto predicates_str = "list<MCInstPredicate> Predicates = [";
+  constexpr auto predicate_str = "MCInstPredicate Pred = ";
+  constexpr auto valid_opcodes_str = "list<Instruction> ValidOpcodes = [";
+  constexpr auto register_str = "Register Reg = ";
+  constexpr auto statement_str = "MCStatement Body = ";
+  constexpr auto first_index_str = "int FirstIndex = ";
+  constexpr auto second_index_str = "int SecondIndex = ";
+  constexpr auto numops_str = "int NumOps = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, function_name_str, strlen(function_name_str)))
+      function_name = ScanName(lstart + strlen(function_name_str));
+    if (!strncmp(lstart, instr_fn_name_str, strlen(instr_fn_name_str)))
+      function_mapper = ScanMember(lstart + strlen(instr_fn_name_str));
+    else if (!strncmp(lstart, mc_ins_fn_str, strlen(mc_ins_fn_str)))
+      function_name = ScanMember(lstart + strlen(mc_ins_fn_str));
+    else if (!strncmp(lstart, tii_ptr_name_str, strlen(tii_ptr_name_str)))
+      immval = ScanMember(lstart + strlen(tii_ptr_name_str));
+    else if (!strncmp(lstart, function_mapper_str, strlen(function_mapper_str)))
+      function_mapper = ScanMember(lstart + strlen(function_mapper_str));
+    else if (!strncmp(lstart, opindex_str, strlen(opindex_str)))
+      opindex = ScanMember(lstart + strlen(opindex_str));
+    else if (!strncmp(lstart, immval_str, strlen(immval_str)))
+      immval = ScanMember(lstart + strlen(immval_str));
+    else if (!strncmp(lstart, numops_str, strlen(numops_str)))
+      immval = ScanMember(lstart + strlen(numops_str));
+    else if (!strncmp(lstart, immval_str_s, strlen(immval_str_s)))
+      immval = ScanMember(lstart + strlen(immval_str_s));
+    else if (!strncmp(lstart, first_index_str, strlen(first_index_str)))
+      opindex = ScanMember(lstart + strlen(first_index_str));
+    else if (!strncmp(lstart, second_index_str, strlen(second_index_str)))
+      immval = ScanMember(lstart + strlen(second_index_str));
+    else if (!strncmp(lstart, predicates_str, strlen(predicates_str)))
+      predicates = ScanList(lstart + strlen(predicates_str));
+    else if (!strncmp(lstart, predicate_str, strlen(predicate_str)))
+      predicates.push_back(ScanMember(lstart + strlen(predicate_str)));
+    else if (!strncmp(lstart, valid_opcodes_str, strlen(valid_opcodes_str)))
+      valid_opcodes = ScanList(lstart + strlen(valid_opcodes_str));
+    else if (!strncmp(lstart, register_str, strlen(register_str)))
+      register_name = ScanMember(lstart + strlen(register_str));
+    else if (!strncmp(lstart, statement_str, strlen(statement_str)))
+      statement = ScanMember(lstart + strlen(statement_str));
+  }
+
+  // Process predicate attributes on predicate definition line.
+  for (auto &attr : pred_attributes)
+    if (strstr(paren, attr.c_str()))
+      attributes.push_back(attr);
+
+  assert(!mc_inst_predicates_.count(name) && "Unknown MCInstPredicate Record!");
+  mc_inst_predicates_[name] = new MCInstPredicate(
+      name, function_name, function_mapper, opindex, immval, predicates,
+      valid_opcodes, register_name, attributes, statement);
+}
+
+//-------------------------------------------------------------------------
+// Read in entries associated with MCStatement objects.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanMCStatement(std::ifstream &in,
+                                         const std::string &name,
+                                         const char *paren) {
+  std::string attribute;
+  std::string predicate;
+  std::vector<std::string> cases;
+  std::string default_case;
+
+  std::string input;
+  char *lstart;
+
+  constexpr auto predicate_str = "MCInstPredicate Pred = ";
+  constexpr auto cases_str = "list<MCOpcodeSwitchCase> Cases = [";
+  constexpr auto default_case_str = "MCStatement DefaultCase = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, predicate_str, strlen(predicate_str)))
+      predicate = ScanMember(lstart + strlen(predicate_str));
+    else if (!strncmp(lstart, cases_str, strlen(cases_str)))
+      cases = ScanList(lstart + strlen(cases_str));
+    if (!strncmp(lstart, default_case_str, strlen(default_case_str)))
+      default_case = ScanMember(lstart + strlen(default_case_str));
+  }
+
+  // Note attributes on definition line.
+  if (strstr(paren, "MCReturnStatement"))
+    attribute = "ReturnStatement";
+  if (strstr(paren, "MCOpcodeSwitchStatement"))
+    attribute = "OpcodeSwitch";
+
+  statements_[name] =
+      new MCStatement(name, attribute, predicate, cases, default_case);
+}
+
+//-------------------------------------------------------------------------
+// Read in entries associated with MCOpcodeSwitchCase objects.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanMCOpcodeSwitchCase(std::ifstream &in,
+                                                const std::string &name) {
+  std::string attribute;
+  std::vector<std::string> cases;
+  std::string case_stmt;
+
+  std::string input;
+  char *lstart;
+
+  constexpr auto cases_str = "list<Instruction> Opcodes = [";
+  constexpr auto default_case_str = "MCStatement CaseStmt = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, cases_str, strlen(cases_str)))
+      cases = ScanList(lstart + strlen(cases_str));
+    if (!strncmp(lstart, default_case_str, strlen(default_case_str)))
+      case_stmt = ScanMember(lstart + strlen(default_case_str));
+  }
+
+  switch_cases_[name] = new MCOpcodeSwitchCase(name, cases, case_stmt);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a SchedPredicate definition, looking for
+// fields we're interested in:
+//    - the "Predicate"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanSchedPredicate(std::ifstream &in,
+                                            const std::string &name) {
+  std::string predicate;
+  std::string sched_model;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto predicate_str = "code Predicate = ";
+  constexpr auto predicate_str_s = "string Predicate = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, predicate_str, strlen(predicate_str)))
+      predicate = ScanCode(in, lstart + strlen(predicate_str));
+    if (!strncmp(lstart, predicate_str_s, strlen(predicate_str_s)))
+      predicate = ScanCode(in, lstart + strlen(predicate_str_s));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+  }
+
+  assert(!IsSchedPredicate(name) && "Duplicate SchedPredicate Record!");
+  sched_predicates_[name] = new SchedPredicate(name, sched_model, predicate);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a SchedAlias definition, looking for
+// fields we're interested in:
+//    - the "MatchRW"
+//    - the "AliasRW"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanSchedAlias(std::ifstream &in,
+                                        const std::string &name) {
+  std::string match_rw;
+  std::string alias_rw;
+  std::string sched_model;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto match_rw_str = "SchedReadWrite MatchRW = ";
+  constexpr auto alias_rw_str = "SchedReadWrite AliasRW = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, match_rw_str, strlen(match_rw_str)))
+      match_rw = ScanMember(lstart + strlen(match_rw_str));
+    if (!strncmp(lstart, alias_rw_str, strlen(alias_rw_str)))
+      alias_rw = ScanMember(lstart + strlen(alias_rw_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+  }
+
+  auto *item = new SchedAlias(name, sched_model, match_rw, alias_rw);
+  sched_alias_[name] = item;
+  sched_alias_matches_[match_rw][sched_model] = alias_rw;
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a WriteSequence definition, looking for
+// fields we're interested in:
+//    - the "Writes"
+//    - the "Repeat"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanWriteSequence(std::ifstream &in,
+                                           const std::string &name) {
+  std::vector<std::string> writes;
+  std::string repeat;
+  std::string sched_model;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto writes_str = "list<SchedWrite> Writes = [";
+  constexpr auto repeat_str = "int Repeat = ";
+  constexpr auto sched_model_str = "SchedMachineModel SchedModel = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, writes_str, strlen(writes_str)))
+      writes = ScanList(lstart + strlen(writes_str));
+    if (!strncmp(lstart, repeat_str, strlen(repeat_str)))
+      repeat = ScanMember(lstart + strlen(repeat_str));
+    else if (!strncmp(lstart, sched_model_str, strlen(sched_model_str)))
+      sched_model = ScanFixedMember(lstart + strlen(sched_model_str));
+  }
+
+  assert(!write_sequences_.count(name) && "Duplicate WriteSequence Record!");
+  write_sequences_[name] =
+      new WriteSequence(name, sched_model, writes, std::stoi(repeat));
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a ProcessorItineraries definition, looking
+// for fields we're interested in:
+//    - the "FU"
+//    - the "BP"
+//    - the "IID"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanProcessorItineraries(std::ifstream &in,
+                                                  const std::string &name) {
+  StringSet resources;
+  std::vector<std::string> bypass;
+  StringSet iid;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto func_units_str = "list<FuncUnit> FU = [";
+  constexpr auto bypass_str = "list<Bypass> BP = [";
+  constexpr auto iid_str = "list<InstrItinData> IID = [";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, func_units_str, strlen(func_units_str)))
+      resources = ScanSet(lstart + strlen(func_units_str));
+    if (!strncmp(lstart, bypass_str, strlen(bypass_str)))
+      bypass = ScanList(lstart + strlen(bypass_str));
+    else if (!strncmp(lstart, iid_str, strlen(iid_str)))
+      iid = ScanSet(lstart + strlen(iid_str));
+  }
+
+  assert(!processor_itineraries_.count(name) &&
+         "Duplicate ProcessorItineraries Record!");
+  processor_itineraries_[name] =
+      new ProcessorItineraries(name, resources, bypass, iid);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a InstrItinData definition, looking
+// for fields we're interested in:
+//    - the "TheClass"
+//    - the "NumMicroOps"
+//    - the "Stages"
+//    - the "OperandCycles"
+//    - the "Bypasses"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanInstrItineraryData(std::ifstream &in,
+                                                const std::string &name) {
+  std::string class_name("");
+  std::string num_micro_ops("");
+  std::vector<std::string> stages;
+  std::vector<std::string> operand_cycles;
+  std::vector<std::string> bypasses;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto instr_itinerary_class_str = "InstrItinClass TheClass = ";
+  constexpr auto num_micro_op_str = "int NumMicroOps = ";
+  constexpr auto stages_str = "list<InstrStage> Stages = [";
+  constexpr auto operand_cycles_str = "list<int> OperandCycles = [";
+  constexpr auto bypasses_str = "list<Bypass> Bypasses = [";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, instr_itinerary_class_str,
+                 strlen(instr_itinerary_class_str)))
+      class_name = ScanMember(lstart + strlen(instr_itinerary_class_str));
+    if (!strncmp(lstart, num_micro_op_str, strlen(num_micro_op_str)))
+      num_micro_ops = ScanMember(lstart + strlen(num_micro_op_str));
+    else if (!strncmp(lstart, stages_str, strlen(stages_str)))
+      stages = ScanList(lstart + strlen(stages_str));
+    else if (!strncmp(lstart, operand_cycles_str, strlen(operand_cycles_str)))
+      operand_cycles = ScanList(lstart + strlen(operand_cycles_str));
+    else if (!strncmp(lstart, bypasses_str, strlen(bypasses_str)))
+      bypasses = ScanList(lstart + strlen(bypasses_str));
+  }
+
+  auto *item =
+      new InstrItineraryData(name, class_name, std::stoi(num_micro_ops), stages,
+                             operand_cycles, bypasses);
+  instr_itinerary_data_[name] = item;
+  instr_itinerary_class_[class_name].push_back(item);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a InstrStage definition, looking
+// for fields we're interested in:
+//    - the "Writes"
+//    - the "Repeat"
+//    - the "SchedModel"
+//-------------------------------------------------------------------------
+void MachineDescription::ScanInstrStage(std::ifstream &in,
+                                        const std::string &name) {
+  std::string cycles("");
+  StringVec resources;
+  std::string timeinc("");
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto cycles_str = "int Cycles = ";
+  constexpr auto func_units_str = "list<FuncUnit> Units = [";
+  constexpr auto timeinc_str = "int TimeInc = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, cycles_str, strlen(cycles_str)))
+      cycles = ScanMember(lstart + strlen(cycles_str));
+    if (!strncmp(lstart, func_units_str, strlen(func_units_str)))
+      resources = ScanList(lstart + strlen(func_units_str));
+    if (!strncmp(lstart, timeinc_str, strlen(timeinc_str)))
+      timeinc = ScanMember(lstart + strlen(timeinc_str));
+  }
+
+  assert(!instr_stages_.count(name) && "Duplicate InstrStage Record!");
+  instr_stages_[name] =
+      new InstrStage(name, std::stoi(cycles), resources, std::stoi(timeinc));
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a InstrItinClasses definition.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanInstrItineraryClasses(std::ifstream &in,
+                                                   const std::string &name) {
+  assert(!instr_itinerary_classes_.count(name) &&
+         "Duplicate InstrItinClasses Record!");
+  instr_itinerary_classes_.insert(name);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with a Bypass definition.
+//-------------------------------------------------------------------------
+void MachineDescription::ScanBypasses(std::ifstream &in,
+                                      const std::string &name) {
+  assert(!bypasses_.count(name) && "Duplicate Bypass Record!");
+  bypasses_.insert(name);
+}
+
+//-------------------------------------------------------------------------
+// Read the entries associated with an instruction definition looking for
+// fields we're interested in:
+//    - the "outs".
+//    - the "ins".
+//    - the assembly syntax.
+//    - the dag matching pattern.
+//    - the instruction's "parent".
+//-------------------------------------------------------------------------
+void MachineDescription::ScanInstruction(std::ifstream &in,
+                                         const std::string &name,
+                                         const char *paren) {
+  std::vector<std::string> outs;     // Output operands.
+  std::vector<std::string> ins;      // Input operands.
+  std::string assy;                  // Assembly syntax.
+  std::string pat;                   // Matching pattern.
+  StringVec *subunit = nullptr;      // Subunit identifier.
+  StringVec implicit_uses;           // Implicit Uses.
+  StringVec implicit_defs;           // Implicit Defs.
+  std::string parent;                // Parent instruction.
+  std::string base_instr;            // Base Instruction.
+  std::vector<std::string> rw_units; // SchedRW Subunits.
+  std::string name_space;            // Namespace attribute.
+  std::string itinerary;             // InstrItinClass attribute.
+  bool pseudo = false;
+  bool load = false;
+  std::string input;
+  char *lstart;
+
+  // These strings correspond to subfields we are interested in.
+  constexpr auto out_str = "dag OutOperandList = (outs";
+  constexpr auto in_str = "dag InOperandList = (ins";
+  constexpr auto assy_str = "string AsmString = ";
+  constexpr auto pat_str = "list<dag> Pattern = ";
+  constexpr auto parent_str = "Instruction Unbundled = ";
+  constexpr auto base_instr_str = "Instruction base_instr = ";
+  constexpr auto isPseudo = "bit isPseudo = 1";
+  constexpr auto mayLoad = "bit mayLoad = 1";
+  constexpr auto subunit_str = "list<SubUnitEncoding> SubUnits = [";
+  constexpr auto namespace__str = "string Namespace = ";
+  constexpr auto implicit_uses_str = "list<Register> Uses = [";
+  constexpr auto implicit_defs_str = "list<Register> Defs = [";
+  constexpr auto derived_subunits_str = "list<SchedReadWrite> SchedRW = [";
+  constexpr auto itinerary_str = "InstrItinClass Itinerary = ";
+
+  // Read subfield definitions and process the ones we're interested in.
+  while (in && (lstart = GetLine(in, input))) {
+    if (!strncmp(lstart, out_str, strlen(out_str)))
+      outs = ScanInstructOpnds(lstart + strlen(out_str));
+    else if (!strncmp(lstart, in_str, strlen(in_str)))
+      ins = ScanInstructOpnds(lstart + strlen(in_str));
+    else if (!strncmp(lstart, assy_str, strlen(assy_str)))
+      assy = ScanInstructAssy(lstart + strlen(assy_str));
+    else if (!strncmp(lstart, pat_str, strlen(pat_str)))
+      pat = ScanInstructPattern(lstart + strlen(pat_str));
+    else if (!strncmp(lstart, parent_str, strlen(parent_str)))
+      parent = ScanInstructParent(lstart + strlen(parent_str));
+    else if (!strncmp(lstart, base_instr_str, strlen(base_instr_str)))
+      base_instr = ScanInstructParent(lstart + strlen(base_instr_str));
+    else if (!strncmp(lstart, subunit_str, strlen(subunit_str)))
+      subunit = ScanInstructSubunit(lstart + strlen(subunit_str));
+    else if (!strncmp(lstart, implicit_uses_str, strlen(implicit_uses_str)))
+      implicit_uses = ScanList(lstart + strlen(implicit_uses_str));
+    else if (!strncmp(lstart, implicit_defs_str, strlen(implicit_defs_str)))
+      implicit_defs = ScanList(lstart + strlen(implicit_defs_str));
+    else if (!strncmp(lstart, namespace__str, strlen(namespace__str)))
+      name_space = ScanName(lstart + strlen(namespace__str));
+    else if (!strncmp(lstart, isPseudo, strlen(isPseudo)))
+      pseudo = true;
+    else if (!strncmp(lstart, mayLoad, strlen(mayLoad)))
+      load = true;
+    else if (!strncmp(lstart, derived_subunits_str,
+                      strlen(derived_subunits_str)))
+      rw_units = ScanList(lstart + strlen(derived_subunits_str));
+    else if (!strncmp(lstart, itinerary_str, strlen(itinerary_str)))
+      itinerary = ScanMember(lstart + strlen(itinerary_str));
+  }
+
+  // If instruction wasn't part of the current family, ignore.
+  // We also read all the StandardPseudoInstructions.
+  if (name_space != family() && name_space != "TargetOpcode")
+    return;
+
+  //---------------------------------------------------------------------
+  // Create an instruction description object.
+  // If it's a base instruction, add to the table.  If it'a child, add
+  // it to its parent's list of children.
+  //---------------------------------------------------------------------
+  auto inst =
+      new Instruction(name, outs, ins, assy, pat, parent, subunit, rw_units,
+                      implicit_uses, implicit_defs, base_instr, itinerary, load,
+                      pseudo, strstr(paren, "GenericInstruction"));
+  if (parent.empty())
+    instructions_[name] = inst; // Duplicates ought to be impossible
+  else
+    instructions_[parent]->children_.push_back(inst);
+}
+
+//-------------------------------------------------------------------------
+// Just skip over class definitions (for now).
+//-------------------------------------------------------------------------
+void MachineDescription::ScanClass(std::ifstream &in, char *input) {
+  SkipRecord(in);
+}
+
+//-------------------------------------------------------------------------
+// In a def record, look for fields of interest.
+//---------------------------------------------------------------------
+// Process lines of the form:
+//     def <name> { <attributes ...>
+//---------------------------------------------------------------------
+// We're looking for instruction definitions which:
+//    - have an attribute of "instruction" "<family>Inst"
+//    - have a non-empty attribute AsmString
+//    - have "ins" and "outs"
+// For instructions, we want to collect:
+//    - the instruction name
+//    - the "outs"
+//    - the "ins"
+//    - the assembly syntax
+//    - the subunit identifier
+//    - ... and maybe some other stuff
+// We're also looking for register definitions, which
+//    - have an attribute of "Register" and "<family>Reg"
+// We're also looking for non-trivial "operand" definitions, which:
+//    - have an attribute of "OperandDAG"
+//    - have a non-trivial OperandInfo entry
+//-------------------------------------------------------------------------
+void MachineDescription::ScanDef(std::ifstream &in, char *input) {
+  int name_start = 0;
+  int name_end = 0;
+
+  //  Isolate the name of the object being defined.
+  for (name_start = 4; isspace(input[name_start]); name_start++) {
+  }
+  for (name_end = name_start; input[name_end] != ' '; name_end++) {
+  }
+  input[name_end] = 0; // Terminate the name.
+
+  // Find the open paren - if it's not there, we're confused.
+  char *paren = nullptr;
+  if (!(paren = strchr(&input[name_end + 1], '{'))) {
+    SkipRecord(in);
+    return;
+  }
+
+  //  Is this is a processor family instruction record?
+  if (strstr(paren, " Instruction "))
+    ScanInstruction(in, &input[name_start], paren);
+
+  //  Is this is a operand record?
+  else if (strstr(paren, " DAGOperand Operand"))
+    ScanOperand(in, &input[name_start]);
+
+  // Scan Various register definitions.  Note: Scan plain "registers" last
+  // since they may only have the "Register" qualify (and at the EOL).
+  //  Is this is an register-operand record?
+  else if (strstr(paren, " DAGOperand RegisterOperand"))
+    ScanRegisterOperand(in, &input[name_start]);
+
+  else if (strstr(paren, " RegisterClass"))
+    ScanRegisterClass(in, &input[name_start]);
+
+  else if (strstr(paren, " unknown_class"))
+    ScanUnknownRegisterClass(in, &input[name_start]);
+
+  //  Is this is a register record for our processor family?
+  else if (strstr(paren, " Register"))
+    ScanRegister(in, &input[name_start]);
+
+  else if (strstr(paren, " SDPatternOperator SDNode"))
+    ScanPatternOperator(in, &input[name_start]);
+
+  else if (strstr(paren, " ValueType"))
+    ScanValueType(in, &input[name_start]);
+
+  else if (strstr(paren, " PointerLikeRegClass"))
+    ScanPointerLikeRegClass(in, &input[name_start]);
+
+  //----------------------------------------------------------------------
+  // If we're not extracting the entire architecture spec, skip other
+  // records.
+  //----------------------------------------------------------------------
+  else if (!gen_arch_spec)
+    SkipRecord(in);
+
+  else if (strstr(paren, " Processor Proc") ||
+           (strstr(paren, " Processor") &&
+            !strstr(paren, " ProcessorItineraries")))
+    ScanProcessorModel(in);
+
+  else if (strstr(paren, " SchedMachineModel"))
+    ScanSchedMachineModel(in, &input[name_start]);
+
+  else if (strstr(paren, " ProcResourceKind Proc"))
+    ScanProcResource(in, &input[name_start], paren);
+
+  else if (strstr(paren, " FuncUnit"))
+    ScanProcResource(in, &input[name_start], paren);
+
+  else if (strstr(paren, " ComboFuncData"))
+    ScanComboFuncData(in);
+
+  else if (strstr(paren, " InstRW"))
+    ScanInstRW(in, &input[name_start]);
+
+  else if (strstr(paren, " ItinRW"))
+    ScanItinRW(in, &input[name_start]);
+
+  //---------------------------------------------------------------------
+  // Process all the SchedReadWrite resource definitions.  Process
+  // SchedRead and SchedWrite records last, to catch all the qualified
+  // versions first.
+  //---------------------------------------------------------------------
+  else if (strstr(paren, " ProcWriteResources"))
+    ScanSchedReadWrite(in, &input[name_start], paren, true);
+
+  else if (strstr(paren, " ProcReadAdvance"))
+    ScanSchedReadWrite(in, &input[name_start], paren, false);
+
+  else if (strstr(paren, " WriteSequence"))
+    ScanWriteSequence(in, &input[name_start]);
+
+  else if (strstr(paren, " SchedVariant"))
+    ScanSchedVariant(in, &input[name_start], paren);
+
+  else if (strstr(paren, " SchedVar"))
+    ScanSchedVar(in, &input[name_start]);
+
+  else if (strstr(paren, " SchedReadWrite SchedWrite"))
+    ScanSchedReadWrite(in, &input[name_start], paren, true);
+
+  else if (strstr(paren, " SchedReadWrite SchedRead"))
+    ScanSchedReadWrite(in, &input[name_start], paren, false);
+
+  else if (strstr(paren, " SchedAlias"))
+    ScanSchedAlias(in, &input[name_start]);
+
+  //---------------------------------------------------------------------
+  // Process predicate-related records.
+  //---------------------------------------------------------------------
+  else if (strstr(paren, " SchedPredicateBase SchedPredicate"))
+    ScanSchedPredicate(in, &input[name_start]);
+
+  else if (strstr(paren, " MCSchedPredicate"))
+    ScanMCSchedPredicate(in, &input[name_start]);
+
+  else if (strstr(paren, " MCInstPredicate"))
+    ScanMCInstPredicate(in, &input[name_start], paren);
+
+  else if (strstr(paren, " MCStatement"))
+    ScanMCStatement(in, &input[name_start], paren);
+
+  else if (strstr(paren, " MCOpcodeSwitchCase"))
+    ScanMCOpcodeSwitchCase(in, &input[name_start]);
+
+  //---------------------------------------------------------------------
+  // Process Itinerary records.
+  //---------------------------------------------------------------------
+  else if (strstr(paren, " ProcessorItineraries"))
+    ScanProcessorItineraries(in, &input[name_start]);
+
+  else if (strstr(paren, " InstrItinData"))
+    ScanInstrItineraryData(in, &input[name_start]);
+
+  else if (strstr(paren, " InstrStage"))
+    ScanInstrStage(in, &input[name_start]);
+
+  else if (strstr(paren, " Bypass"))
+    ScanBypasses(in, &input[name_start]);
+
+  else if (strstr(paren, " InstrItinClass"))
+    ScanInstrItineraryClasses(in, &input[name_start]);
+
+  else
+    SkipRecord(in); // Nothing interesting, skip rest of record.
+}
+
+//-------------------------------------------------------------------------
+// Open the file, scan all the lines and add to machine description.
+//
+// The "records" file is a flattened version of the td file for a cpu, and
+// contains records for each instruction and register (and other things).
+//
+// The file contains "class" descriptors and "def" descriptors:
+//        ------------- Classes ----------------
+//        class <name> {
+//        ...
+//        }
+//        ...
+//        ------------- Defs ----------------
+//        def <name> {
+//        ...
+//        }
+//        ...
+//-------------------------------------------------------------------------
+void MachineDescription::ScanFile(const char *file) {
+  std::ifstream in(file);
+  if (!in) {
+    std::cerr << "Error: input file \"" << file << "\" not found" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  while (in) {
+    std::string line;
+    char *input = GetLine(in, line);
+    if (input == nullptr)
+      continue;
+    if (!in)
+      return;
+
+    if (strncmp(input, "def", strlen("def")) == 0)
+      ScanDef(in, input);
+    else if (strncmp(input, "class", strlen("class")) == 0)
+      ScanClass(in, input);
+    else if (strncmp(input, "--", 2) == 0) // Skip comment lines.
+      continue;
+    else
+      std::cerr << "Unrecognized line:" << input << std::endl;
+  }
+
+  in.close();
+}
+
+} // namespace scan
+} // namespace mdl
+} // namespace mpact