diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -108,15 +108,16 @@
 ///
 /// Defined as an aggregate struct for creating tables with initializer lists.
 struct MCSchedClassDesc {
-  static const unsigned short InvalidNumMicroOps = (1U << 14) - 1;
+  static const unsigned short InvalidNumMicroOps = (1U << 13) - 1;
   static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1;
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   const char* Name;
 #endif
-  uint16_t NumMicroOps : 14;
+  uint16_t NumMicroOps : 13;
   bool     BeginGroup : 1;
   bool     EndGroup : 1;
+  bool     RetireOOO : 1;
   uint16_t WriteProcResIdx; // First index into WriteProcResTable.
   uint16_t NumWriteProcResEntries;
   uint16_t WriteLatencyIdx; // First index into WriteLatencyTable.
diff --git a/llvm/include/llvm/MCA/Context.h b/llvm/include/llvm/MCA/Context.h
--- a/llvm/include/llvm/MCA/Context.h
+++ b/llvm/include/llvm/MCA/Context.h
@@ -68,6 +68,11 @@
   /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
   std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
                                                   SourceMgr &SrcMgr);
+
+  /// Construct a basic pipeline for simulating an in-order pipeline.
+  /// This pipeline consists of Fetch, InOrderIssue, and Retire stages.
+  std::unique_ptr<Pipeline> createInOrderPipeline(const PipelineOptions &Opts,
+                                                  SourceMgr &SrcMgr);
 };
 
 } // namespace mca
diff --git a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
--- a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -172,11 +172,6 @@
   void freePhysRegs(const RegisterRenamingInfo &Entry,
                     MutableArrayRef<unsigned> FreedPhysRegs);
 
-  // Collects writes that are in a RAW dependency with RS.
-  // This method is called from `addRegisterRead()`.
-  void collectWrites(const ReadState &RS,
-                     SmallVectorImpl<WriteRef> &Writes) const;
-
   // Create an instance of RegisterMappingTracker for every register file
   // specified by the processor model.
   // If no register file is specified, then this method creates a default
@@ -187,6 +182,10 @@
   RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
                unsigned NumRegs = 0);
 
+  // Collects writes that are in a RAW dependency with RS.
+  void collectWrites(const ReadState &RS,
+                     SmallVectorImpl<WriteRef> &Writes) const;
+
   // This method updates the register mappings inserting a new register
   // definition. This method is also responsible for updating the number of
   // allocated physical registers in each register file modified by the write.
diff --git a/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
--- a/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -104,6 +104,9 @@
 #ifndef NDEBUG
   void dump() const;
 #endif
+
+  // Assigned to instructions that are not handled by the RCU.
+  static const unsigned UnhandledTokenID = ~0U;
 };
 
 } // namespace mca
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -375,6 +375,7 @@
   bool HasSideEffects;
   bool BeginGroup;
   bool EndGroup;
+  bool RetireOOO;
 
   // True if all buffered resources are in-order, and there is at least one
   // buffer which is a dispatch hazard (BufferSize = 0).
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -0,0 +1,84 @@
+//===---------------------- InOrderIssueStage.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// InOrderIssueStage implements an in-order execution pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_IN_ORDER_ISSUE_STAGE_H
+#define LLVM_MCA_IN_ORDER_ISSUE_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+#include <queue>
+
+namespace llvm {
+struct MCSchedModel;
+class MCSubtargetInfo;
+
+namespace mca {
+class RegisterFile;
+class ResourceManager;
+struct RetireControlUnit;
+
+class InOrderIssueStage final : public Stage {
+  const MCSchedModel &SM;
+  const MCSubtargetInfo &STI;
+  RetireControlUnit &RCU;
+  RegisterFile &PRF;
+  std::unique_ptr<ResourceManager> RM;
+
+  /// Instructions that were issued, but not executed yet.
+  SmallVector<InstRef, 4> IssuedInst;
+
+  /// Number of instructions issued in the current cycle.
+  unsigned NumIssued;
+
+  /// If an instruction cannot execute due to an unmet register or resource
+  /// dependency, the it is stalled for StallCyclesLeft.
+  InstRef StalledInst;
+  unsigned StallCyclesLeft;
+
+  /// Number of instructions that can be issued in the current cycle.
+  unsigned Bandwidth;
+
+  InOrderIssueStage(const InOrderIssueStage &Other) = delete;
+  InOrderIssueStage &operator=(const InOrderIssueStage &Other) = delete;
+
+  /// If IR has an unmet register or resource dependency, canExecute returns
+  /// false. StallCycles is set to the number of cycles left before the
+  /// instruction can be issued.
+  bool canExecute(const InstRef &IR, unsigned *StallCycles) const;
+
+  /// Issue the instruction, or update StallCycles if IR is stalled.
+  Error tryIssue(InstRef &IR, unsigned *StallCycles);
+
+  /// Update status of instructions from IssuedInst.
+  Error updateIssuedInst();
+
+public:
+  InOrderIssueStage(RetireControlUnit &RCU, RegisterFile &PRF,
+                    const MCSchedModel &SM, const MCSubtargetInfo &STI)
+      : SM(SM), STI(STI), RCU(RCU), PRF(PRF),
+        RM(std::make_unique<ResourceManager>(SM)), StallCyclesLeft(0),
+        Bandwidth(0) {}
+
+  bool isAvailable(const InstRef &) const override;
+  bool hasWorkToComplete() const override;
+  Error execute(InstRef &IR) override;
+  Error cycleStart() override;
+  Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_IN_ORDER_ISSUE_STAGE_H
diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h
--- a/llvm/include/llvm/MCA/Stages/RetireStage.h
+++ b/llvm/include/llvm/MCA/Stages/RetireStage.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_MCA_RETIRE_STAGE_H
 #define LLVM_MCA_RETIRE_STAGE_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MCA/HardwareUnits/LSUnit.h"
 #include "llvm/MCA/HardwareUnits/RegisterFile.h"
 #include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
@@ -29,6 +30,7 @@
   RetireControlUnit &RCU;
   RegisterFile &PRF;
   LSUnitBase &LSU;
+  SmallVector<InstRef, 4> RetireInst;
 
   RetireStage(const RetireStage &Other) = delete;
   RetireStage &operator=(const RetireStage &Other) = delete;
@@ -37,7 +39,9 @@
   RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
       : Stage(), RCU(R), PRF(F), LSU(LS) {}
 
-  bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+  bool hasWorkToComplete() const override {
+    return !RCU.isEmpty() || !RetireInst.empty();
+  }
   Error cycleStart() override;
   Error execute(InstRef &IR) override;
   void notifyInstructionRetired(const InstRef &IR) const;
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -262,6 +262,10 @@
   // Allow a processor to mark some scheduling classes as single-issue.
   // SingleIssue is an alias for Begin/End Group.
   bit SingleIssue = false;
+  // An instruction is allowed to retire out-of-order if RetireOOO is
+  // true for at least one of its writes. This field is only used by
+  // MCA for in-order subtargets, and is ignored for other targets.
+  bit RetireOOO = false;
   SchedMachineModel SchedModel = ?;
 }
 
diff --git a/llvm/lib/MCA/CMakeLists.txt b/llvm/lib/MCA/CMakeLists.txt
--- a/llvm/lib/MCA/CMakeLists.txt
+++ b/llvm/lib/MCA/CMakeLists.txt
@@ -14,6 +14,7 @@
   Stages/DispatchStage.cpp
   Stages/EntryStage.cpp
   Stages/ExecuteStage.cpp
+  Stages/InOrderIssueStage.cpp
   Stages/InstructionTables.cpp
   Stages/MicroOpQueueStage.cpp
   Stages/RetireStage.cpp
diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp
--- a/llvm/lib/MCA/Context.cpp
+++ b/llvm/lib/MCA/Context.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MCA/Stages/DispatchStage.h"
 #include "llvm/MCA/Stages/EntryStage.h"
 #include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/InOrderIssueStage.h"
 #include "llvm/MCA/Stages/MicroOpQueueStage.h"
 #include "llvm/MCA/Stages/RetireStage.h"
 
@@ -31,6 +32,9 @@
 Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
   const MCSchedModel &SM = STI.getSchedModel();
 
+  if (!SM.isOutOfOrder())
+    return createInOrderPipeline(Opts, SrcMgr);
+
   // Create the hardware units defining the backend.
   auto RCU = std::make_unique<RetireControlUnit>(SM);
   auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
@@ -64,5 +68,29 @@
   return StagePipeline;
 }
 
+std::unique_ptr<Pipeline>
+Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
+  const MCSchedModel &SM = STI.getSchedModel();
+  auto RCU = std::make_unique<RetireControlUnit>(SM);
+  auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+  auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+                                      Opts.StoreQueueSize, Opts.AssumeNoAlias);
+
+  auto Entry = std::make_unique<EntryStage>(SrcMgr);
+  auto InOrderIssue = std::make_unique<InOrderIssueStage>(*RCU, *PRF, SM, STI);
+  auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
+
+  auto StagePipeline = std::make_unique<Pipeline>();
+  StagePipeline->appendStage(std::move(Entry));
+  StagePipeline->appendStage(std::move(InOrderIssue));
+  StagePipeline->appendStage(std::move(Retire));
+
+  addHardwareUnit(std::move(RCU));
+  addHardwareUnit(std::move(PRF));
+  addHardwareUnit(std::move(LSU));
+
+  return StagePipeline;
+}
+
 } // namespace mca
 } // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
--- a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
+++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -33,12 +33,18 @@
     MaxRetirePerCycle = EPI.MaxRetirePerCycle;
   }
   NumROBEntries = AvailableEntries;
+  bool IsOutOfOrder = SM.MicroOpBufferSize;
+  if (!IsOutOfOrder && !NumROBEntries)
+    return;
   assert(NumROBEntries && "Invalid reorder buffer size!");
   Queue.resize(2 * NumROBEntries);
 }
 
 // Reserves a number of slots, and returns a new token.
 unsigned RetireControlUnit::dispatch(const InstRef &IR) {
+  if (!NumROBEntries)
+    return UnhandledTokenID;
+
   const Instruction &Inst = *IR.getInstruction();
   unsigned Entries = normalizeQuantity(Inst.getNumMicroOps());
   assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!");
@@ -47,6 +53,7 @@
   Queue[NextAvailableSlotIdx] = {IR, Entries, false};
   NextAvailableSlotIdx += std::max(1U, Entries);
   NextAvailableSlotIdx %= Queue.size();
+  assert(TokenID < UnhandledTokenID && "Invalid token ID");
 
   AvailableEntries -= Entries;
   return TokenID;
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -570,6 +570,7 @@
   ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
   ID->BeginGroup = SCDesc.BeginGroup;
   ID->EndGroup = SCDesc.EndGroup;
+  ID->RetireOOO = SCDesc.RetireOOO;
 
   initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
   computeMaxLatency(*ID, MCDesc, SCDesc, STI);
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -0,0 +1,288 @@
+//===---------------------- InOrderIssueStage.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// InOrderIssueStage implements an in-order execution pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/InOrderIssueStage.h"
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+
+#include <algorithm>
+
+#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
+namespace mca {
+
+bool InOrderIssueStage::hasWorkToComplete() const {
+  return !IssuedInst.empty() || StalledInst;
+}
+
+bool InOrderIssueStage::isAvailable(const InstRef &IR) const {
+  const Instruction &Inst = *IR.getInstruction();
+  unsigned NumMicroOps = Inst.getNumMicroOps();
+  const InstrDesc &Desc = Inst.getDesc();
+
+  if (Bandwidth < NumMicroOps)
+    return false;
+
+  // Instruction with BeginGroup must be the first instruction to be issued in a
+  // cycle.
+  if (Desc.BeginGroup && NumIssued != 0)
+    return false;
+
+  return true;
+}
+
+static bool hasResourceHazard(const ResourceManager &RM, const InstRef &IR) {
+  if (RM.checkAvailability(IR.getInstruction()->getDesc())) {
+    LLVM_DEBUG(dbgs() << "[E] Stall #" << IR << '\n');
+    return true;
+  }
+
+  return false;
+}
+
+/// Return a number of cycles left until register requirements of the
+/// instructions are met.
+static unsigned checkRegisterHazard(const RegisterFile &PRF,
+                                    const MCSchedModel &SM,
+                                    const MCSubtargetInfo &STI,
+                                    const InstRef &IR) {
+  unsigned StallCycles = 0;
+  SmallVector<WriteRef, 4> Writes;
+
+  for (const ReadState &RS : IR.getInstruction()->getUses()) {
+    const ReadDescriptor &RD = RS.getDescriptor();
+    const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+
+    PRF.collectWrites(RS, Writes);
+    for (const WriteRef &WR : Writes) {
+      const WriteState *WS = WR.getWriteState();
+      unsigned WriteResID = WS->getWriteResourceID();
+      int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+      LLVM_DEBUG(dbgs() << "[E] ReadAdvance for #" << IR << ": " << ReadAdvance
+                        << '\n');
+
+      assert(ReadAdvance >= 0);
+      assert(WS->getCyclesLeft() != UNKNOWN_CYCLES);
+      unsigned CyclesLeft = WS->getCyclesLeft();
+      if (CyclesLeft > (unsigned)ReadAdvance) {
+        LLVM_DEBUG(dbgs() << "[E] Register hazard: " << WS->getRegisterID()
+                          << '\n');
+        StallCycles = std::max(StallCycles, CyclesLeft - ReadAdvance);
+      }
+    }
+    Writes.clear();
+  }
+
+  return StallCycles;
+}
+
+bool InOrderIssueStage::canExecute(const InstRef &IR,
+                                   unsigned *StallCycles) const {
+  *StallCycles = 0;
+
+  if (unsigned RegStall = checkRegisterHazard(PRF, SM, STI, IR)) {
+    *StallCycles = RegStall;
+    // FIXME: add a parameter to HWStallEvent to indicate a number of cycles.
+    for (unsigned I = 0; I < RegStall; ++I) {
+      notifyEvent<HWStallEvent>(
+          HWStallEvent(HWStallEvent::RegisterFileStall, IR));
+      notifyEvent<HWPressureEvent>(
+          HWPressureEvent(HWPressureEvent::REGISTER_DEPS, IR));
+    }
+  } else if (hasResourceHazard(*RM, IR)) {
+    *StallCycles = 1;
+    notifyEvent<HWStallEvent>(
+        HWStallEvent(HWStallEvent::DispatchGroupStall, IR));
+    notifyEvent<HWPressureEvent>(
+        HWPressureEvent(HWPressureEvent::RESOURCES, IR));
+  }
+
+  return *StallCycles == 0;
+}
+
+static void addRegisterReadWrite(RegisterFile &PRF, Instruction &IS,
+                                 unsigned SourceIndex,
+                                 const MCSubtargetInfo &STI,
+                                 SmallVectorImpl<unsigned> &UsedRegs) {
+  assert(!IS.isEliminated());
+
+  for (ReadState &RS : IS.getUses())
+    PRF.addRegisterRead(RS, STI);
+
+  for (WriteState &WS : IS.getDefs())
+    PRF.addRegisterWrite(WriteRef(SourceIndex, &WS), UsedRegs);
+}
+
+static void notifyInstructionExecute(
+    const InstRef &IR,
+    const SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedRes,
+    const Stage &S) {
+
+  S.notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Ready, IR));
+  S.notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, UsedRes));
+
+  LLVM_DEBUG(dbgs() << "[E] Issued #" << IR << "\n");
+}
+
+static void notifyInstructionDispatch(const InstRef &IR, unsigned Ops,
+                                      const SmallVectorImpl<unsigned> &UsedRegs,
+                                      const Stage &S) {
+
+  S.notifyEvent<HWInstructionEvent>(
+      HWInstructionDispatchedEvent(IR, UsedRegs, Ops));
+
+  LLVM_DEBUG(dbgs() << "[E] Dispatched #" << IR << "\n");
+}
+
+llvm::Error InOrderIssueStage::execute(InstRef &IR) {
+  Instruction &IS = *IR.getInstruction();
+  const InstrDesc &Desc = IS.getDesc();
+
+  unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
+  if (!Desc.RetireOOO)
+    RCUTokenID = RCU.dispatch(IR);
+  IS.dispatch(RCUTokenID);
+
+  if (Desc.EndGroup) {
+    Bandwidth = 0;
+  } else {
+    unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps();
+    assert(Bandwidth >= NumMicroOps);
+    Bandwidth -= NumMicroOps;
+  }
+
+  if (llvm::Error E = tryIssue(IR, &StallCyclesLeft))
+    return E;
+
+  if (StallCyclesLeft) {
+    StalledInst = IR;
+    Bandwidth = 0;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
+  Instruction &IS = *IR.getInstruction();
+  unsigned SourceIndex = IR.getSourceIndex();
+
+  if (!canExecute(IR, StallCycles)) {
+    LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles
+                      << " cycles\n");
+    return llvm::ErrorSuccess();
+  }
+
+  SmallVector<unsigned, 4> UsedRegs(PRF.getNumRegisterFiles());
+  addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs);
+
+  notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this);
+
+  SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
+  RM->issueInstruction(IS.getDesc(), UsedResources);
+  IS.execute(SourceIndex);
+
+  // Replace resource masks with valid resource processor IDs.
+  for (std::pair<ResourceRef, ResourceCycles> &Use : UsedResources) {
+    uint64_t Mask = Use.first.first;
+    Use.first.first = RM->resolveResourceMask(Mask);
+  }
+  notifyInstructionExecute(IR, UsedResources, *this);
+
+  IssuedInst.push_back(IR);
+  ++NumIssued;
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::updateIssuedInst() {
+  // Update other instructions. Executed instructions will be retired during the
+  // next cycle.
+  unsigned NumExecuted = 0;
+  for (auto I = IssuedInst.begin(), E = IssuedInst.end();
+       I != (E - NumExecuted);) {
+    InstRef &IR = *I;
+    Instruction &IS = *IR.getInstruction();
+
+    IS.cycleEvent();
+    if (!IS.isExecuted()) {
+      LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR
+                        << " is still executing\n");
+      ++I;
+      continue;
+    }
+    notifyEvent<HWInstructionEvent>(
+        HWInstructionEvent(HWInstructionEvent::Executed, IR));
+
+    LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
+    ++NumExecuted;
+    std::iter_swap(I, E - NumExecuted);
+  }
+
+  // Retire instructions in the next cycle
+  if (NumExecuted) {
+    for (auto I = IssuedInst.end() - NumExecuted, E = IssuedInst.end(); I != E;
+         ++I) {
+      if (llvm::Error E = moveToTheNextStage(*I))
+        return E;
+    }
+    IssuedInst.resize(IssuedInst.size() - NumExecuted);
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::cycleStart() {
+  NumIssued = 0;
+
+  // Release consumed resources.
+  SmallVector<ResourceRef, 4> Freed;
+  RM->cycleEvent(Freed);
+
+  if (llvm::Error E = updateIssuedInst())
+    return E;
+
+  // Issue instructions scheduled for this cycle
+  if (!StallCyclesLeft && StalledInst) {
+    if (llvm::Error E = tryIssue(StalledInst, &StallCyclesLeft))
+      return E;
+  }
+
+  if (!StallCyclesLeft) {
+    StalledInst.invalidate();
+    assert(NumIssued <= SM.IssueWidth && "Overflow.");
+    Bandwidth = SM.IssueWidth - NumIssued;
+  } else {
+    // The instruction is still stalled, cannot issue any new instructions in
+    // this cycle.
+    Bandwidth = 0;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::cycleEnd() {
+  if (StallCyclesLeft > 0)
+    --StallCyclesLeft;
+  return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp
--- a/llvm/lib/MCA/Stages/RetireStage.cpp
+++ b/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -23,9 +23,6 @@
 namespace mca {
 
 llvm::Error RetireStage::cycleStart() {
-  if (RCU.isEmpty())
-    return llvm::ErrorSuccess();
-
   const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
   unsigned NumRetired = 0;
   while (!RCU.isEmpty()) {
@@ -39,11 +36,26 @@
     NumRetired++;
   }
 
+  // Retire instructions that are not controlled by the RCU
+  for (InstRef &IR : RetireInst) {
+    IR.getInstruction()->retire();
+    notifyInstructionRetired(IR);
+  }
+  RetireInst.resize(0);
+
   return llvm::ErrorSuccess();
 }
 
 llvm::Error RetireStage::execute(InstRef &IR) {
-  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  Instruction &IS = *IR.getInstruction();
+
+  unsigned TokenID = IS.getRCUTokenID();
+  if (TokenID != RetireControlUnit::UnhandledTokenID) {
+    RCU.onInstructionExecuted(TokenID);
+    return llvm::ErrorSuccess();
+  }
+
+  RetireInst.push_back(IR);
   return llvm::ErrorSuccess();
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -151,6 +151,8 @@
 
 // FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
 def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
+
+let RetireOOO = 1 in {
 def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
                                             let ResourceCycles = [29]; }
 def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
@@ -166,7 +168,7 @@
                                                       let ResourceCycles = [9]; }
 def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
                                                       let ResourceCycles = [19]; }
-
+}
 //===----------------------------------------------------------------------===//
 // Subtarget-specific SchedRead types.
 
@@ -336,4 +338,6 @@
 def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
 def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
 def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+def A55RCU : RetireControlUnit<64, 0>;
 }
diff --git a/llvm/test/TableGen/InvalidMCSchedClassDesc.td b/llvm/test/TableGen/InvalidMCSchedClassDesc.td
--- a/llvm/test/TableGen/InvalidMCSchedClassDesc.td
+++ b/llvm/test/TableGen/InvalidMCSchedClassDesc.td
@@ -19,7 +19,7 @@
 // Inst_B didn't have the resoures, and it is invalid.
 // CHECK: SchedModel_ASchedClasses[] = {
 // CHECK: {DBGFIELD("Inst_A")             1
-// CHECK-NEXT: {DBGFIELD("Inst_B")             16383 
+// CHECK-NEXT: {DBGFIELD("Inst_B")             8191
 let SchedModel = SchedModel_A in {
   def Write_A : SchedWriteRes<[]>;
   def : InstRW<[Write_A], (instrs Inst_A)>;
@@ -27,7 +27,7 @@
 
 // Inst_A didn't have the resoures, and it is invalid.
 // CHECK: SchedModel_BSchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A")             16383 
+// CHECK: {DBGFIELD("Inst_A")             8191
 // CHECK-NEXT: {DBGFIELD("Inst_B")             1 
 let SchedModel = SchedModel_B in {
   def Write_B: SchedWriteRes<[]>; 
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
@@ -0,0 +1,81 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --timeline --iterations=2 < %s | FileCheck %s
+
+add      w2, w3, #1
+add      w4, w3, #2, lsl #12
+add      w0, w4, #3
+add      w1, w0, #4
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      8
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total uOps:        8
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.80
+# CHECK-NEXT: IPC:               0.80
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     0.50                        add	w2, w3, #1
+# CHECK-NEXT:  1      3     0.50                        add	w4, w3, #2, lsl #12
+# CHECK-NEXT:  1      3     0.50                        add	w0, w4, #3
+# CHECK-NEXT:  1      3     0.50                        add	w1, w0, #4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	w2, w3, #1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     add	w4, w3, #2, lsl #12
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	w0, w4, #3
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     add	w1, w0, #4
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeER.   .   add	w2, w3, #1
+# CHECK-NEXT: [0,1]     DeeER.   .   add	w4, w3, #2, lsl #12
+# CHECK-NEXT: [0,2]     .DeeER   .   add	w0, w4, #3
+# CHECK-NEXT: [0,3]     . DeeER  .   add	w1, w0, #4
+# CHECK-NEXT: [1,0]     . DeeER  .   add	w2, w3, #1
+# CHECK-NEXT: [1,1]     .  DeeER .   add	w4, w3, #2, lsl #12
+# CHECK-NEXT: [1,2]     .   DeeER.   add	w0, w4, #3
+# CHECK-NEXT: [1,3]     .    DeeER   add	w1, w0, #4
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     2     0.0    0.0    0.0       add	w2, w3, #1
+# CHECK-NEXT: 1.     2     0.0    0.0    0.0       add	w4, w3, #2, lsl #12
+# CHECK-NEXT: 2.     2     0.0    0.0    0.0       add	w0, w4, #3
+# CHECK-NEXT: 3.     2     0.0    0.0    0.0       add	w1, w0, #4
+# CHECK-NEXT:        2     0.0    0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
@@ -0,0 +1,100 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --all-stats --iterations=2 < %s | FileCheck %s
+
+ldr	w4, [x2], #4
+ldr	w5, [x3]
+madd	w0, w5, w4, w0
+add	x3, x3, x13
+subs	x1, x1, #1
+str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      12
+# CHECK-NEXT: Total Cycles:      21
+# CHECK-NEXT: Total uOps:        14
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.67
+# CHECK-NEXT: IPC:               0.57
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      3     1.00    *                   ldr	w4, [x2], #4
+# CHECK-NEXT:  1      3     1.00    *                   ldr	w5, [x3]
+# CHECK-NEXT:  1      4     1.00                        madd	w0, w5, w4, w0
+# CHECK-NEXT:  1      3     0.50                        add	x3, x3, x13
+# CHECK-NEXT:  1      3     0.50                        subs	x1, x1, #1
+# CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
+# CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
+# CHECK-NEXT: LQ      - Load queue full:                           0
+# CHECK-NEXT: SQ      - Store queue full:                          0
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 0
+
+# CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT:  0,              11  (52.4%)
+# CHECK-NEXT:  1,              6  (28.6%)
+# CHECK-NEXT:  2,              4  (19.0%)
+
+# CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT:  0,          11  (52.4%)
+# CHECK-NEXT:  1,          6  (28.6%)
+# CHECK-NEXT:  2,          4  (19.0%)
+
+# CHECK:      Scheduler's queue usage:
+# CHECK-NEXT: No scheduler resources used.
+
+# CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT:  0,           14  (66.7%)
+# CHECK-NEXT:  1,           4  (19.0%)
+# CHECK-NEXT:  2,           1  (4.8%)
+# CHECK-NEXT:  3,           2  (9.5%)
+
+# CHECK:      Total ROB Entries:                64
+# CHECK-NEXT: Max Used ROB Entries:             6  ( 9.4% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( 3.1% )
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    14
+# CHECK-NEXT: Max number of mappings used:         6
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 1.00   1.00    -      -      -      -      -      -      -     2.00   1.00   1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w4, [x2], #4
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w5, [x3]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     madd	w0, w5, w4, w0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	x3, x3, x13
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     subs	x1, x1, #1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00   str	w0, [x21, x18, lsl #2]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
@@ -0,0 +1,139 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --all-views --iterations=2 < %s | FileCheck %s
+
+ldr	w4, [x2], #4
+ldr	w5, [x3]
+madd	w0, w5, w4, w0
+add	x3, x3, x13
+subs	x1, x1, #1
+str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      12
+# CHECK-NEXT: Total Cycles:      21
+# CHECK-NEXT: Total uOps:        14
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.67
+# CHECK-NEXT: IPC:               0.57
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK:      Cycles with backend pressure increase [ 19.05% ]
+# CHECK-NEXT: Throughput Bottlenecks:
+# CHECK-NEXT:   Resource Pressure       [ 0.00% ]
+# CHECK-NEXT:   Data Dependencies:      [ 19.05% ]
+# CHECK-NEXT:   - Register Dependencies [ 19.05% ]
+# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      3     1.00    *                   ldr	w4, [x2], #4
+# CHECK-NEXT:  1      3     1.00    *                   ldr	w5, [x3]
+# CHECK-NEXT:  1      4     1.00                        madd	w0, w5, w4, w0
+# CHECK-NEXT:  1      3     0.50                        add	x3, x3, x13
+# CHECK-NEXT:  1      3     0.50                        subs	x1, x1, #1
+# CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
+# CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
+# CHECK-NEXT: LQ      - Load queue full:                           0
+# CHECK-NEXT: SQ      - Store queue full:                          0
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 0
+
+# CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT:  0,              11  (52.4%)
+# CHECK-NEXT:  1,              6  (28.6%)
+# CHECK-NEXT:  2,              4  (19.0%)
+
+# CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT:  0,          11  (52.4%)
+# CHECK-NEXT:  1,          6  (28.6%)
+# CHECK-NEXT:  2,          4  (19.0%)
+
+# CHECK:      Scheduler's queue usage:
+# CHECK-NEXT: No scheduler resources used.
+
+# CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT:  0,           14  (66.7%)
+# CHECK-NEXT:  1,           4  (19.0%)
+# CHECK-NEXT:  2,           1  (4.8%)
+# CHECK-NEXT:  3,           2  (9.5%)
+
+# CHECK:      Total ROB Entries:                64
+# CHECK-NEXT: Max Used ROB Entries:             6  ( 9.4% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( 3.1% )
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    14
+# CHECK-NEXT: Max number of mappings used:         6
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 1.00   1.00    -      -      -      -      -      -      -     2.00   1.00   1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w4, [x2], #4
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w5, [x3]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     madd	w0, w5, w4, w0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	x3, x3, x13
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     subs	x1, x1, #1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00   str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          0
+
+# CHECK:      [0,0]     DeeER.    .    .    .   ldr	w4, [x2], #4
+# CHECK-NEXT: [0,1]     .DeeER    .    .    .   ldr	w5, [x3]
+# CHECK-NEXT: [0,2]     .   DeeeER.    .    .   madd	w0, w5, w4, w0
+# CHECK-NEXT: [0,3]     .   DeeE-R.    .    .   add	x3, x3, x13
+# CHECK-NEXT: [0,4]     .    DeeER.    .    .   subs	x1, x1, #1
+# CHECK-NEXT: [0,5]     .    . DeeeER  .    .   str	w0, [x21, x18, lsl #2]
+# CHECK-NEXT: [1,0]     .    .  DeeER  .    .   ldr	w4, [x2], #4
+# CHECK-NEXT: [1,1]     .    .   DeeER .    .   ldr	w5, [x3]
+# CHECK-NEXT: [1,2]     .    .    . DeeeER  .   madd	w0, w5, w4, w0
+# CHECK-NEXT: [1,3]     .    .    . DeeE-R  .   add	x3, x3, x13
+# CHECK-NEXT: [1,4]     .    .    .  DeeER  .   subs	x1, x1, #1
+# CHECK-NEXT: [1,5]     .    .    .    DeeeER   str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     2     0.0    0.0    0.0       ldr	w4, [x2], #4
+# CHECK-NEXT: 1.     2     0.0    0.0    0.0       ldr	w5, [x3]
+# CHECK-NEXT: 2.     2     0.0    0.0    0.0       madd	w0, w5, w4, w0
+# CHECK-NEXT: 3.     2     0.0    0.0    1.0       add	x3, x3, x13
+# CHECK-NEXT: 4.     2     0.0    0.0    0.0       subs	x1, x1, #1
+# CHECK-NEXT: 5.     2     0.0    0.0    0.0       str	w0, [x21, x18, lsl #2]
+# CHECK-NEXT:        2     0.0    0.0    0.2       <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
@@ -0,0 +1,135 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --all-stats --all-views --iterations=2 < %s | FileCheck %s
+
+sdiv	w12, w21, w0
+add	w8, w8, #1
+add	w1, w2, w0
+add	w3, w4, #1
+add	w5, w6, w0
+add	w7, w9, w0
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      12
+# CHECK-NEXT: Total Cycles:      18
+# CHECK-NEXT: Total uOps:        12
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.67
+# CHECK-NEXT: IPC:               0.67
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK:      Cycles with backend pressure increase [ 27.78% ]
+# CHECK-NEXT: Throughput Bottlenecks:
+# CHECK-NEXT:   Resource Pressure       [ 27.78% ]
+# CHECK-NEXT:   Data Dependencies:      [ 0.00% ]
+# CHECK-NEXT:   - Register Dependencies [ 0.00% ]
+# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      8     8.00                        sdiv	w12, w21, w0
+# CHECK-NEXT:  1      3     0.50                        add	w8, w8, #1
+# CHECK-NEXT:  1      3     0.50                        add	w1, w2, w0
+# CHECK-NEXT:  1      3     0.50                        add	w3, w4, #1
+# CHECK-NEXT:  1      3     0.50                        add	w5, w6, w0
+# CHECK-NEXT:  1      3     0.50                        add	w7, w9, w0
+
+# CHECK:      Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT     - Register unavailable:                      0
+# CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
+# CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
+# CHECK-NEXT: LQ      - Load queue full:                           0
+# CHECK-NEXT: SQ      - Store queue full:                          0
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 5  (27.8%)
+
+# CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT:  0,              12  (66.7%)
+# CHECK-NEXT:  2,              6  (33.3%)
+
+# CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT:  0,          12  (66.7%)
+# CHECK-NEXT:  2,          6  (33.3%)
+
+# CHECK:      Scheduler's queue usage:
+# CHECK-NEXT: No scheduler resources used.
+
+# CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT:  0,           16  (88.9%)
+# CHECK-NEXT:  6,           2  (11.1%)
+
+# CHECK:      Total ROB Entries:                64
+# CHECK-NEXT: Max Used ROB Entries:             8  ( 12.5% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  5  ( 7.8% )
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    12
+# CHECK-NEXT: Max number of mappings used:         8
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 2.50   2.50    -     8.00    -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -     8.00    -      -      -      -      -      -      -      -     sdiv	w12, w21, w0
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w8, w8, #1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w1, w2, w0
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w3, w4, #1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w5, w6, w0
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w7, w9, w0
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     01234567
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeER.    . .   sdiv	w12, w21, w0
+# CHECK-NEXT: [0,1]     DeeE-----R.    . .   add	w8, w8, #1
+# CHECK-NEXT: [0,2]     .DeeE----R.    . .   add	w1, w2, w0
+# CHECK-NEXT: [0,3]     .DeeE----R.    . .   add	w3, w4, #1
+# CHECK-NEXT: [0,4]     . DeeE---R.    . .   add	w5, w6, w0
+# CHECK-NEXT: [0,5]     . DeeE---R.    . .   add	w7, w9, w0
+# CHECK-NEXT: [1,0]     .    .  DeeeeeeeER   sdiv	w12, w21, w0
+# CHECK-NEXT: [1,1]     .    .  DeeE-----R   add	w8, w8, #1
+# CHECK-NEXT: [1,2]     .    .   DeeE----R   add	w1, w2, w0
+# CHECK-NEXT: [1,3]     .    .   DeeE----R   add	w3, w4, #1
+# CHECK-NEXT: [1,4]     .    .    DeeE---R   add	w5, w6, w0
+# CHECK-NEXT: [1,5]     .    .    DeeE---R   add	w7, w9, w0
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     2     0.0    0.0    0.0       sdiv	w12, w21, w0
+# CHECK-NEXT: 1.     2     0.0    0.0    5.0       add	w8, w8, #1
+# CHECK-NEXT: 2.     2     0.0    0.0    4.0       add	w1, w2, w0
+# CHECK-NEXT: 3.     2     0.0    0.0    4.0       add	w3, w4, #1
+# CHECK-NEXT: 4.     2     0.0    0.0    3.0       add	w5, w6, w0
+# CHECK-NEXT: 5.     2     0.0    0.0    3.0       add	w7, w9, w0
+# CHECK-NEXT:        2     0.0    0.0    3.2       <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --all-stats --all-views --iterations=2 < %s | FileCheck %s
+
+fdiv	s1, s2, s3
+add	w8, w8, #1
+add	w1, w2, w0
+add	w3, w4, #1
+add	w5, w6, w0
+add	w7, w9, w0
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      12
+# CHECK-NEXT: Total Cycles:      25
+# CHECK-NEXT: Total uOps:        12
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.48
+# CHECK-NEXT: IPC:               0.48
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK:      Cycles with backend pressure increase [ 28.00% ]
+# CHECK-NEXT: Throughput Bottlenecks:
+# CHECK-NEXT:   Resource Pressure       [ 28.00% ]
+# CHECK-NEXT:   Data Dependencies:      [ 0.00% ]
+# CHECK-NEXT:   - Register Dependencies [ 0.00% ]
+# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      13    10.00                       fdiv	s1, s2, s3
+# CHECK-NEXT:  1      3     0.50                        add	w8, w8, #1
+# CHECK-NEXT:  1      3     0.50                        add	w1, w2, w0
+# CHECK-NEXT:  1      3     0.50                        add	w3, w4, #1
+# CHECK-NEXT:  1      3     0.50                        add	w5, w6, w0
+# CHECK-NEXT:  1      3     0.50                        add	w7, w9, w0
+
+# CHECK:      Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT     - Register unavailable:                      0
+# CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
+# CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
+# CHECK-NEXT: LQ      - Load queue full:                           0
+# CHECK-NEXT: SQ      - Store queue full:                          0
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 7  (28.0%)
+
+# CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT:  0,              19  (76.0%)
+# CHECK-NEXT:  2,              6  (24.0%)
+
+# CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT:  0,          19  (76.0%)
+# CHECK-NEXT:  2,          6  (24.0%)
+
+# CHECK:      Scheduler's queue usage:
+# CHECK-NEXT: No scheduler resources used.
+
+# CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT:  0,           18  (72.0%)
+# CHECK-NEXT:  1,           2  (8.0%)
+# CHECK-NEXT:  2,           5  (20.0%)
+
+# CHECK:      Total ROB Entries:                64
+# CHECK-NEXT: Max Used ROB Entries:             7  ( 10.9% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( 3.1% )
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    12
+# CHECK-NEXT: Max number of mappings used:         7
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 2.50   2.50    -      -      -      -     10.00   -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -     10.00   -      -      -      -      -     fdiv	s1, s2, s3
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w8, w8, #1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w1, w2, w0
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w3, w4, #1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w5, w6, w0
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w7, w9, w0
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          01234
+
+# CHECK:      [0,0]     DeeeeeeeeeeeeER.    .   .   fdiv	s1, s2, s3
+# CHECK-NEXT: [0,1]     DeeER.    .    .    .   .   add	w8, w8, #1
+# CHECK-NEXT: [0,2]     .DeeER    .    .    .   .   add	w1, w2, w0
+# CHECK-NEXT: [0,3]     .DeeER    .    .    .   .   add	w3, w4, #1
+# CHECK-NEXT: [0,4]     . DeeER   .    .    .   .   add	w5, w6, w0
+# CHECK-NEXT: [0,5]     . DeeER   .    .    .   .   add	w7, w9, w0
+# CHECK-NEXT: [1,0]     .    .    DeeeeeeeeeeeeER   fdiv	s1, s2, s3
+# CHECK-NEXT: [1,1]     .    .    DeeER.    .   .   add	w8, w8, #1
+# CHECK-NEXT: [1,2]     .    .    .DeeER    .   .   add	w1, w2, w0
+# CHECK-NEXT: [1,3]     .    .    .DeeER    .   .   add	w3, w4, #1
+# CHECK-NEXT: [1,4]     .    .    . DeeER   .   .   add	w5, w6, w0
+# CHECK-NEXT: [1,5]     .    .    . DeeER   .   .   add	w7, w9, w0
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     2     0.0    0.0    0.0       fdiv	s1, s2, s3
+# CHECK-NEXT: 1.     2     0.0    0.0    0.0       add	w8, w8, #1
+# CHECK-NEXT: 2.     2     0.0    0.0    0.0       add	w1, w2, w0
+# CHECK-NEXT: 3.     2     0.0    0.0    0.0       add	w3, w4, #1
+# CHECK-NEXT: 4.     2     0.0    0.0    0.0       add	w5, w6, w0
+# CHECK-NEXT: 5.     2     0.0    0.0    0.0       add	w7, w9, w0
+# CHECK-NEXT:        2     0.0    0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/X86/in-order-cpu.s b/llvm/test/tools/llvm-mca/X86/in-order-cpu.s
--- a/llvm/test/tools/llvm-mca/X86/in-order-cpu.s
+++ b/llvm/test/tools/llvm-mca/X86/in-order-cpu.s
@@ -1,3 +1,3 @@
-# RUN: not llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=atom -o /dev/null 2>&1 | FileCheck %s
-
-# CHECK: error: please specify an out-of-order cpu. 'atom' is an in-order cpu.
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=atom -o /dev/null 2>&1 | FileCheck %s
+# CHECK: warning: support for in-order CPU 'atom' is experimental.
+movsbw	%al, %di
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -335,9 +335,8 @@
     return 1;
 
   if (!PrintInstructionTables && !STI->getSchedModel().isOutOfOrder()) {
-    WithColor::error() << "please specify an out-of-order cpu. '" << MCPU
-                       << "' is an in-order cpu.\n";
-    return 1;
+    WithColor::warning() << "support for in-order CPU '" << MCPU
+                         << "' is experimental.\n";
   }
 
   if (!STI->getSchedModel().hasInstrSchedModel()) {
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -999,6 +999,7 @@
     SCDesc.NumMicroOps = 0;
     SCDesc.BeginGroup = false;
     SCDesc.EndGroup = false;
+    SCDesc.RetireOOO = false;
     SCDesc.WriteProcResIdx = 0;
     SCDesc.WriteLatencyIdx = 0;
     SCDesc.ReadAdvanceIdx = 0;
@@ -1101,6 +1102,7 @@
         SCDesc.EndGroup |= WriteRes->getValueAsBit("EndGroup");
         SCDesc.BeginGroup |= WriteRes->getValueAsBit("SingleIssue");
         SCDesc.EndGroup |= WriteRes->getValueAsBit("SingleIssue");
+        SCDesc.RetireOOO |= WriteRes->getValueAsBit("RetireOOO");
 
         // Create an entry for each ProcResource listed in WriteRes.
         RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources");
@@ -1299,7 +1301,7 @@
     std::vector<MCSchedClassDesc> &SCTab =
       SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())];
 
-    OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup,"
+    OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup, RetireOOO,"
        << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n";
     OS << "static const llvm::MCSchedClassDesc "
        << PI->ModelName << "SchedClasses[] = {\n";
@@ -1310,7 +1312,7 @@
            && "invalid class not first");
     OS << "  {DBGFIELD(\"InvalidSchedClass\")  "
        << MCSchedClassDesc::InvalidNumMicroOps
-       << ", false, false,  0, 0,  0, 0,  0, 0},\n";
+       << ", false, false, false, 0, 0,  0, 0,  0, 0},\n";
 
     for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) {
       MCSchedClassDesc &MCDesc = SCTab[SCIdx];
@@ -1321,6 +1323,7 @@
       OS << MCDesc.NumMicroOps
          << ", " << ( MCDesc.BeginGroup ? "true" : "false" )
          << ", " << ( MCDesc.EndGroup ? "true" : "false" )
+         << ", " << ( MCDesc.RetireOOO ? "true" : "false" )
          << ", " << format("%2d", MCDesc.WriteProcResIdx)
          << ", " << MCDesc.NumWriteProcResEntries
          << ", " << format("%2d", MCDesc.WriteLatencyIdx)