diff --git a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
--- a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -172,11 +172,6 @@
   void freePhysRegs(const RegisterRenamingInfo &Entry,
                     MutableArrayRef<unsigned> FreedPhysRegs);
 
-  // Collects writes that are in a RAW dependency with RS.
-  // This method is called from `addRegisterRead()`.
-  void collectWrites(const ReadState &RS,
-                     SmallVectorImpl<WriteRef> &Writes) const;
-
   // Create an instance of RegisterMappingTracker for every register file
   // specified by the processor model.
   // If no register file is specified, then this method creates a default
@@ -187,6 +182,10 @@
   RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
                unsigned NumRegs = 0);
 
+  // Collects writes that are in a RAW dependency with RS.
+  void collectWrites(const ReadState &RS,
+                     SmallVectorImpl<WriteRef> &Writes) const;
+
   // This method updates the register mappings inserting a new register
   // definition. This method is also responsible for updating the number of
   // allocated physical registers in each register file modified by the write.
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -0,0 +1,83 @@
+//===---------------------- InOrderIssueStage.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// InOrderIssueStage implements an in-order execution pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_IN_ORDER_ISSUE_STAGE_H
+#define LLVM_MCA_IN_ORDER_ISSUE_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+#include <queue>
+
+namespace llvm {
+class MCSchedModel;
+class MCSubtargetInfo;
+
+namespace mca {
+class RegisterFile;
+class ResourceManager;
+
+class InOrderIssueStage final : public Stage {
+  const MCSchedModel &SM;
+  const MCSubtargetInfo &STI;
+  RegisterFile &PRF;
+  std::unique_ptr<ResourceManager> RM;
+
+  /// Instructions that were issued, but not retired yet. Executed instructions
+  /// (with CyclesLeft == 0) are moved to the end of IssuedInst.
+  SmallVector<InstRef, 4> IssuedInst;
+  /// Number of executed instructions at the end of IssuedInst.
+  int NumExecuted;
+
+  /// Instructions that must be issued during the next cycle. If the front
+  /// instruction cannot execute due to an unmet register or resource
+  /// dependency, the whole queue is stalled for StallCyclesLeft.
+  std::queue<InstRef> InstQueue;
+  int StallCyclesLeft;
+
+  /// Number of instructions that can be added to InstQueue (dispatched) during
+  /// this cycle.
+  int Bandwidth;
+
+  InOrderIssueStage(const InOrderIssueStage &Other) = delete;
+  InOrderIssueStage &operator=(const InOrderIssueStage &Other) = delete;
+
+  /// If IR has an unmet register or resource dependency, canExecute returns
+  /// false. StallCycles is set to the number of cycles left before the
+  /// instruction can be issued.
+  bool canExecute(const InstRef &IR, int *StallCycles) const;
+
+  /// Issue the instruction, or update StallCycles if IR is stalled.
+  Error tryIssue(InstRef &IR, int *StallCycles);
+
+  /// Update status of instructions from IssuedInst.
+  Error updateIssuedInst();
+
+public:
+  InOrderIssueStage(RegisterFile &PRF, const MCSchedModel &SM,
+                    const MCSubtargetInfo &STI)
+      : SM(SM), STI(STI), PRF(PRF), RM(std::make_unique<ResourceManager>(SM)),
+        NumExecuted(0), StallCyclesLeft(0), Bandwidth(0) {}
+
+  bool isAvailable(const InstRef &) const override;
+  bool hasWorkToComplete() const override;
+  Error execute(InstRef &IR) override;
+  Error cycleStart() override;
+  Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_IN_ORDER_ISSUE_STAGE_H
diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h
--- a/llvm/include/llvm/MCA/Stages/RetireStage.h
+++ b/llvm/include/llvm/MCA/Stages/RetireStage.h
@@ -26,7 +26,7 @@
 
 class RetireStage final : public Stage {
   // Owner will go away when we move listeners/eventing to the stages.
-  RetireControlUnit &RCU;
+  RetireControlUnit *RCU;
   RegisterFile &PRF;
   LSUnitBase &LSU;
 
@@ -34,10 +34,10 @@
   RetireStage &operator=(const RetireStage &Other) = delete;
 
 public:
-  RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
+  RetireStage(RetireControlUnit *R, RegisterFile &F, LSUnitBase &LS)
       : Stage(), RCU(R), PRF(F), LSU(LS) {}
 
-  bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+  bool hasWorkToComplete() const override { return RCU && !RCU->isEmpty(); }
   Error cycleStart() override;
   Error execute(InstRef &IR) override;
   void notifyInstructionRetired(const InstRef &IR) const;
diff --git a/llvm/lib/MCA/CMakeLists.txt b/llvm/lib/MCA/CMakeLists.txt
--- a/llvm/lib/MCA/CMakeLists.txt
+++ b/llvm/lib/MCA/CMakeLists.txt
@@ -17,6 +17,7 @@
   Stages/InstructionTables.cpp
   Stages/MicroOpQueueStage.cpp
   Stages/RetireStage.cpp
+  Stages/InOrderIssueStage.cpp
   Stages/Stage.cpp
   Support.cpp
 
diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp
--- a/llvm/lib/MCA/Context.cpp
+++ b/llvm/lib/MCA/Context.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MCA/Stages/DispatchStage.h"
 #include "llvm/MCA/Stages/EntryStage.h"
 #include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/InOrderIssueStage.h"
 #include "llvm/MCA/Stages/MicroOpQueueStage.h"
 #include "llvm/MCA/Stages/RetireStage.h"
 
@@ -32,19 +33,35 @@
   const MCSchedModel &SM = STI.getSchedModel();
 
   // Create the hardware units defining the backend.
-  auto RCU = std::make_unique<RetireControlUnit>(SM);
+  std::unique_ptr<RetireControlUnit> RCU;
+  if (SM.isOutOfOrder())
+    RCU = std::make_unique<RetireControlUnit>(SM);
+
   auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
   auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
                                        Opts.StoreQueueSize, Opts.AssumeNoAlias);
   auto HWS = std::make_unique<Scheduler>(SM, *LSU);
 
-  // Create the pipeline stages.
-  auto Fetch = std::make_unique<EntryStage>(SrcMgr);
-  auto Dispatch = std::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
-                                                   *RCU, *PRF);
-  auto Execute =
-      std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
-  auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
+  // Build the pipeline.
+  auto StagePipeline = std::make_unique<Pipeline>();
+  StagePipeline->appendStage(std::make_unique<EntryStage>(SrcMgr));
+  if (SM.isOutOfOrder()) {
+    // Out-of-order pipeline
+    if (Opts.MicroOpQueueSize) {
+      StagePipeline->appendStage(std::make_unique<MicroOpQueueStage>(
+          Opts.MicroOpQueueSize, Opts.DecodersThroughput));
+    }
+    StagePipeline->appendStage(std::make_unique<DispatchStage>(
+        STI, MRI, Opts.DispatchWidth, *RCU, *PRF));
+    StagePipeline->appendStage(
+        std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis));
+  } else {
+    // In-order-pipeline
+    StagePipeline->appendStage(
+        std::make_unique<InOrderIssueStage>(*PRF, SM, STI));
+  }
+  StagePipeline->appendStage(
+      std::make_unique<RetireStage>(RCU.get(), *PRF, *LSU));
 
   // Pass the ownership of all the hardware units to this Context.
   addHardwareUnit(std::move(RCU));
@@ -52,15 +69,6 @@
   addHardwareUnit(std::move(LSU));
   addHardwareUnit(std::move(HWS));
 
-  // Build the pipeline.
-  auto StagePipeline = std::make_unique<Pipeline>();
-  StagePipeline->appendStage(std::move(Fetch));
-  if (Opts.MicroOpQueueSize)
-    StagePipeline->appendStage(std::make_unique<MicroOpQueueStage>(
-        Opts.MicroOpQueueSize, Opts.DecodersThroughput));
-  StagePipeline->appendStage(std::move(Dispatch));
-  StagePipeline->appendStage(std::move(Execute));
-  StagePipeline->appendStage(std::move(Retire));
   return StagePipeline;
 }
 
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -0,0 +1,269 @@
+//===---------------------- InOrderIssueStage.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// InOrderIssueStage implements an in-order execution pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/InOrderIssueStage.h"
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+
+#include <algorithm>
+
+#define DEBUG_TYPE "llvm-mca"
+namespace llvm {
+namespace mca {
+
+bool InOrderIssueStage::hasWorkToComplete() const {
+  return !IssuedInst.empty() || !InstQueue.empty();
+}
+
+bool InOrderIssueStage::isAvailable(const InstRef &IR) const {
+  return Bandwidth > 0;
+}
+
+static int hasResourceHazard(const ResourceManager &RM, const InstRef &IR) {
+  if (RM.checkAvailability(IR.getInstruction()->getDesc())) {
+    LLVM_DEBUG(dbgs() << "[E] Stall #" << IR << '\n');
+    return true;
+  }
+
+  return false;
+}
+
+/// Return a number of cycles left until register requirements of the
+/// instructions are met.
+static int checkRegisterHazard(const RegisterFile &PRF, const MCSchedModel &SM,
+                               const MCSubtargetInfo &STI, const InstRef &IR) {
+  int StallCycles = 0;
+  SmallVector<WriteRef, 4> Writes;
+
+  for (const ReadState &RS : IR.getInstruction()->getUses()) {
+    const ReadDescriptor &RD = RS.getDescriptor();
+    const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+
+    PRF.collectWrites(RS, Writes);
+    for (const WriteRef &WR : Writes) {
+      const WriteState *WS = WR.getWriteState();
+      unsigned WriteResID = WS->getWriteResourceID();
+      int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+      LLVM_DEBUG(dbgs() << "[E] ReadAdvance for #" << IR << ": " << ReadAdvance
+                        << '\n');
+
+      assert(WS->getCyclesLeft() != UNKNOWN_CYCLES);
+      int CyclesLeft = (int)WS->getCyclesLeft();
+      if (CyclesLeft > ReadAdvance) {
+        LLVM_DEBUG(dbgs() << "[E] Register hazard: " << WS->getRegisterID()
+                          << '\n');
+        StallCycles = std::max(StallCycles, CyclesLeft - ReadAdvance);
+      }
+    }
+    Writes.clear();
+  }
+
+  return StallCycles;
+}
+
+bool InOrderIssueStage::canExecute(const InstRef &IR, int *StallCycles) const {
+  *StallCycles = 0;
+
+  if (int RegStall = checkRegisterHazard(PRF, SM, STI, IR)) {
+    *StallCycles = RegStall;
+    // FIXME: add a parameter to HWStallEvent to indicate a number of cycles.
+    for (int i = 0; i < RegStall; ++i) {
+      notifyEvent<HWStallEvent>(
+          HWStallEvent(HWStallEvent::RegisterFileStall, IR));
+      notifyEvent<HWPressureEvent>(
+          HWPressureEvent(HWPressureEvent::REGISTER_DEPS, IR));
+    }
+  } else if (hasResourceHazard(*RM, IR)) {
+    *StallCycles = 1;
+    notifyEvent<HWStallEvent>(
+        HWStallEvent(HWStallEvent::DispatchGroupStall, IR));
+    notifyEvent<HWPressureEvent>(
+        HWPressureEvent(HWPressureEvent::RESOURCES, IR));
+  }
+
+  return *StallCycles == 0;
+}
+
+static void addRegisterReadWrite(RegisterFile &PRF, Instruction &IS,
+                                 unsigned SourceIndex,
+                                 const MCSubtargetInfo &STI,
+                                 SmallVectorImpl<unsigned> &UsedRegs) {
+  if (IS.isEliminated())
+    return;
+
+  for (ReadState &RS : IS.getUses())
+    PRF.addRegisterRead(RS, STI);
+
+  for (WriteState &WS : IS.getDefs())
+    PRF.addRegisterWrite(WriteRef(SourceIndex, &WS), UsedRegs);
+}
+
+static void notifyInstructionExecute(
+    const InstRef &IR,
+    const SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedRes,
+    const Stage &S) {
+
+  S.notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Ready, IR));
+  S.notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, UsedRes));
+
+  LLVM_DEBUG(dbgs() << "[E] Issued #" << IR << "\n");
+}
+
+static void notifyInstructionDispatch(const InstRef &IR, unsigned Ops,
+                                      const SmallVectorImpl<unsigned> &UsedRegs,
+                                      const Stage &S) {
+
+  S.notifyEvent<HWInstructionEvent>(
+      HWInstructionDispatchedEvent(IR, UsedRegs, Ops));
+
+  LLVM_DEBUG(dbgs() << "[E] Dispatched #" << IR << "\n");
+}
+
+llvm::Error InOrderIssueStage::execute(InstRef &IR) {
+  Instruction &IS = *IR.getInstruction();
+
+  IS.dispatch(0);
+
+  SmallVector<unsigned, 8> UsedRegs(PRF.getNumRegisterFiles(), 0U);
+  // Register file information is not available yet. Send a dispatch
+  // event with "zero" uops now, and emit a proper event later.
+  notifyInstructionDispatch(IR, /*Ops=*/0, UsedRegs, *this);
+
+  --Bandwidth;
+  InstQueue.push(IR);
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, int *StallCycles) {
+  Instruction &IS = *IR.getInstruction();
+  unsigned SourceIndex = IR.getSourceIndex();
+
+  if (!canExecute(IR, StallCycles)) {
+    LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles
+                      << " cycles\n");
+    return llvm::ErrorSuccess();
+  }
+
+  SmallVector<unsigned, 4> UsedRegs(PRF.getNumRegisterFiles());
+  addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs);
+
+  // Notify dispatch the second time to update registers used by the
+  // instruction.
+  notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this);
+
+  SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
+  RM->issueInstruction(IS.getDesc(), UsedResources);
+  IS.execute(SourceIndex);
+
+  // Replace resource masks with valid resource processor IDs.
+  for (std::pair<ResourceRef, ResourceCycles> &Use : UsedResources) {
+    uint64_t Mask = Use.first.first;
+    Use.first.first = RM->resolveResourceMask(Mask);
+  }
+  notifyInstructionExecute(IR, UsedResources, *this);
+
+  IssuedInst.push_back(IR);
+  if (NumExecuted)
+    std::iter_swap(IssuedInst.end() - 1, IssuedInst.end() - NumExecuted - 1);
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::updateIssuedInst() {
+  // Retire instructions executed on the previous cycle
+  if (NumExecuted) {
+    for (auto I = IssuedInst.end() - NumExecuted, E = IssuedInst.end(); I != E;
+         ++I) {
+      LLVM_DEBUG(dbgs() << "[E] Retiring #" << *I << " (out of " << NumExecuted
+                        << ")\n");
+      if (llvm::Error E = moveToTheNextStage(*I))
+        return E;
+    }
+    IssuedInst.resize(IssuedInst.size() - NumExecuted);
+    NumExecuted = 0;
+  }
+
+  // Update other instructions. Executed instructions will be retired during the
+  // next cycle.
+  for (auto I = IssuedInst.begin(), E = IssuedInst.end();
+       I != (E - NumExecuted);) {
+    InstRef &IR = *I;
+    Instruction &IS = *IR.getInstruction();
+
+    IS.cycleEvent();
+    if (!IS.isExecuted()) {
+      LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR
+                        << " is still executing\n");
+      ++I;
+      continue;
+    }
+    notifyEvent<HWInstructionEvent>(
+        HWInstructionEvent(HWInstructionEvent::Executed, IR));
+
+    LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
+    ++NumExecuted;
+    std::iter_swap(I, E - NumExecuted);
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::cycleStart() {
+  // Release consumed resources.
+  SmallVector<ResourceRef, 4> Freed;
+  RM->cycleEvent(Freed);
+
+  if (llvm::Error E = updateIssuedInst())
+    return E;
+
+  // Issue instructions scheduled for this cycle
+  while (!StallCyclesLeft && !InstQueue.empty()) {
+    InstRef &IR = InstQueue.front();
+    if (llvm::Error E = tryIssue(IR, &StallCyclesLeft))
+      return E;
+
+    // No stall
+    if (!StallCyclesLeft)
+      InstQueue.pop();
+  }
+
+  if (!StallCyclesLeft) {
+    Bandwidth = SM.IssueWidth;
+  } else if (StallCyclesLeft == 1) {
+    // The stalled instruction will be ready during the next cycle. Add more
+    // instructions if allowed.
+    Bandwidth = SM.IssueWidth - InstQueue.size();
+    assert(Bandwidth >= 0 && "InstQueue overflow.");
+  } else {
+    Bandwidth = 0;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error InOrderIssueStage::cycleEnd() {
+  if (StallCyclesLeft > 0)
+    --StallCyclesLeft;
+  return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp
--- a/llvm/lib/MCA/Stages/RetireStage.cpp
+++ b/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -23,19 +23,19 @@
 namespace mca {
 
 llvm::Error RetireStage::cycleStart() {
-  if (RCU.isEmpty())
+  if (!RCU || RCU->isEmpty())
     return llvm::ErrorSuccess();
 
-  const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
+  const unsigned MaxRetirePerCycle = RCU->getMaxRetirePerCycle();
   unsigned NumRetired = 0;
-  while (!RCU.isEmpty()) {
+  while (!RCU->isEmpty()) {
     if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
       break;
-    const RetireControlUnit::RUToken &Current = RCU.getCurrentToken();
+    const RetireControlUnit::RUToken &Current = RCU->getCurrentToken();
     if (!Current.Executed)
       break;
     notifyInstructionRetired(Current.IR);
-    RCU.consumeCurrentToken();
+    RCU->consumeCurrentToken();
     NumRetired++;
   }
 
@@ -43,7 +43,15 @@
 }
 
 llvm::Error RetireStage::execute(InstRef &IR) {
-  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  Instruction &IS = *IR.getInstruction();
+  if (RCU) {
+    RCU->onInstructionExecuted(IS.getRCUTokenID());
+    return llvm::ErrorSuccess();
+  }
+
+  IS.retire();
+  notifyInstructionRetired(IR);
+
   return llvm::ErrorSuccess();
 }
 
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --timeline --iterations=2 < %s | FileCheck %s
+
+add      w2, w3, #1
+add      w4, w3, #2, lsl #12
+add      w0, w4, #3
+add      w1, w0, #4
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      8
+# CHECK-NEXT: Total Cycles:      11
+# CHECK-NEXT: Total uOps:        8
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.73
+# CHECK-NEXT: IPC:               0.73
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      3     0.50                        add	w2, w3, #1
+# CHECK-NEXT:  1      3     0.50                        add	w4, w3, #2, lsl #12
+# CHECK-NEXT:  1      3     0.50                        add	w0, w4, #3
+# CHECK-NEXT:  1      3     0.50                        add	w1, w0, #4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	w2, w3, #1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     add	w4, w3, #2, lsl #12
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	w0, w4, #3
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     add	w1, w0, #4
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeER    .   add	w2, w3, #1
+# CHECK-NEXT: [0,1]     DeeeER    .   add	w4, w3, #2, lsl #12
+# CHECK-NEXT: [0,2]     .DeeeER   .   add	w0, w4, #3
+# CHECK-NEXT: [0,3]     .D=eeeER  .   add	w1, w0, #4
+# CHECK-NEXT: [1,0]     . DeeeER  .   add	w2, w3, #1
+# CHECK-NEXT: [1,1]     .  DeeeER .   add	w4, w3, #2, lsl #12
+# CHECK-NEXT: [1,2]     .  D=eeeER.   add	w0, w4, #3
+# CHECK-NEXT: [1,3]     .   D=eeeER   add	w1, w0, #4
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     2     1.0    0.0    0.0       add	w2, w3, #1
+# CHECK-NEXT: 1.     2     1.0    0.0    0.0       add	w4, w3, #2, lsl #12
+# CHECK-NEXT: 2.     2     1.5    0.0    0.0       add	w0, w4, #3
+# CHECK-NEXT: 3.     2     2.0    0.0    0.0       add	w1, w0, #4
+# CHECK-NEXT:        2     1.4    0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
@@ -0,0 +1,101 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --all-stats --iterations=2 < %s | FileCheck %s
+
+ldr	w4, [x2], #4
+ldr	w5, [x3]
+madd	w0, w5, w4, w0
+add	x3, x3, x13
+subs	x1, x1, #1
+str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      12
+# CHECK-NEXT: Total Cycles:      21
+# CHECK-NEXT: Total uOps:        14
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.67
+# CHECK-NEXT: IPC:               0.57
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      3     1.00    *                   ldr	w4, [x2], #4
+# CHECK-NEXT:  1      3     1.00    *                   ldr	w5, [x3]
+# CHECK-NEXT:  1      4     1.00                        madd	w0, w5, w4, w0
+# CHECK-NEXT:  1      3     0.50                        add	x3, x3, x13
+# CHECK-NEXT:  1      3     0.50                        subs	x1, x1, #1
+# CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
+# CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
+# CHECK-NEXT: LQ      - Load queue full:                           0
+# CHECK-NEXT: SQ      - Store queue full:                          0
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 1  (4.8%)
+
+# CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT:  0,              12  (57.1%)
+# CHECK-NEXT:  1,              5  (23.8%)
+# CHECK-NEXT:  2,              3  (14.3%)
+# CHECK-NEXT:  3,              1  (4.8%)
+
+# CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT:  0,          12  (57.1%)
+# CHECK-NEXT:  1,          5  (23.8%)
+# CHECK-NEXT:  2,          3  (14.3%)
+# CHECK-NEXT:  3,          1  (4.8%)
+
+# CHECK:      Scheduler's queue usage:
+# CHECK-NEXT: No scheduler resources used.
+
+# CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT:  0,           12  (57.1%)
+# CHECK-NEXT:  1,           6  (28.6%)
+# CHECK-NEXT:  2,           3  (14.3%)
+
+# CHECK:      Total ROB Entries:                0
+# CHECK-NEXT: Max Used ROB Entries:             6  ( inf% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( inf% )
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    14
+# CHECK-NEXT: Max number of mappings used:         6
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 1.00   1.00    -      -      -      -      -      -      -     2.00   1.00   1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w4, [x2], #4
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w5, [x3]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     madd	w0, w5, w4, w0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	x3, x3, x13
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     subs	x1, x1, #1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00   str	w0, [x21, x18, lsl #2]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 --all-views --iterations=2 < %s | FileCheck %s
+
+ldr	w4, [x2], #4
+ldr	w5, [x3]
+madd	w0, w5, w4, w0
+add	x3, x3, x13
+subs	x1, x1, #1
+str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      12
+# CHECK-NEXT: Total Cycles:      21
+# CHECK-NEXT: Total uOps:        14
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    0.67
+# CHECK-NEXT: IPC:               0.57
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK:      Cycles with backend pressure increase [ 23.81% ]
+# CHECK-NEXT: Throughput Bottlenecks:
+# CHECK-NEXT:   Resource Pressure       [ 4.76% ]
+# CHECK-NEXT:   Data Dependencies:      [ 19.05% ]
+# CHECK-NEXT:   - Register Dependencies [ 19.05% ]
+# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      3     1.00    *                   ldr	w4, [x2], #4
+# CHECK-NEXT:  1      3     1.00    *                   ldr	w5, [x3]
+# CHECK-NEXT:  1      4     1.00                        madd	w0, w5, w4, w0
+# CHECK-NEXT:  1      3     0.50                        add	x3, x3, x13
+# CHECK-NEXT:  1      3     0.50                        subs	x1, x1, #1
+# CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Dynamic Dispatch Stall Cycles:
+# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
+# CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
+# CHECK-NEXT: LQ      - Load queue full:                           0
+# CHECK-NEXT: SQ      - Store queue full:                          0
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 1  (4.8%)
+
+# CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+# CHECK-NEXT: [# dispatched], [# cycles]
+# CHECK-NEXT:  0,              12  (57.1%)
+# CHECK-NEXT:  1,              5  (23.8%)
+# CHECK-NEXT:  2,              3  (14.3%)
+# CHECK-NEXT:  3,              1  (4.8%)
+
+# CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
+# CHECK-NEXT: [# issued], [# cycles]
+# CHECK-NEXT:  0,          12  (57.1%)
+# CHECK-NEXT:  1,          5  (23.8%)
+# CHECK-NEXT:  2,          3  (14.3%)
+# CHECK-NEXT:  3,          1  (4.8%)
+
+# CHECK:      Scheduler's queue usage:
+# CHECK-NEXT: No scheduler resources used.
+
+# CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
+# CHECK-NEXT: [# retired], [# cycles]
+# CHECK-NEXT:  0,           12  (57.1%)
+# CHECK-NEXT:  1,           6  (28.6%)
+# CHECK-NEXT:  2,           3  (14.3%)
+
+# CHECK:      Total ROB Entries:                0
+# CHECK-NEXT: Max Used ROB Entries:             6  ( inf% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( inf% )
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    14
+# CHECK-NEXT: Max number of mappings used:         6
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - CortexA55UnitALU
+# CHECK-NEXT: [0.1] - CortexA55UnitALU
+# CHECK-NEXT: [1]   - CortexA55UnitB
+# CHECK-NEXT: [2]   - CortexA55UnitDiv
+# CHECK-NEXT: [3.0] - CortexA55UnitFPALU
+# CHECK-NEXT: [3.1] - CortexA55UnitFPALU
+# CHECK-NEXT: [4]   - CortexA55UnitFPDIV
+# CHECK-NEXT: [5.0] - CortexA55UnitFPMAC
+# CHECK-NEXT: [5.1] - CortexA55UnitFPMAC
+# CHECK-NEXT: [6]   - CortexA55UnitLd
+# CHECK-NEXT: [7]   - CortexA55UnitMAC
+# CHECK-NEXT: [8]   - CortexA55UnitSt
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]
+# CHECK-NEXT: 1.00   1.00    -      -      -      -      -      -      -     2.00   1.00   1.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3.0]  [3.1]  [4]    [5.0]  [5.1]  [6]    [7]    [8]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w4, [x2], #4
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     ldr	w5, [x3]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     madd	w0, w5, w4, w0
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -     add	x3, x3, x13
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -     subs	x1, x1, #1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00   str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          0
+
+# CHECK:      [0,0]     DeeeER    .    .    .   ldr	w4, [x2], #4
+# CHECK-NEXT: [0,1]     D=eeeER   .    .    .   ldr	w5, [x3]
+# CHECK-NEXT: [0,2]     .D===eeeeER    .    .   madd	w0, w5, w4, w0
+# CHECK-NEXT: [0,3]     .   DeeeER.    .    .   add	x3, x3, x13
+# CHECK-NEXT: [0,4]     .    DeeeER    .    .   subs	x1, x1, #1
+# CHECK-NEXT: [0,5]     .    D==eeeeER .    .   str	w0, [x21, x18, lsl #2]
+# CHECK-NEXT: [1,0]     .    . DeeeER  .    .   ldr	w4, [x2], #4
+# CHECK-NEXT: [1,1]     .    .  DeeeER .    .   ldr	w5, [x3]
+# CHECK-NEXT: [1,2]     .    .  D===eeeeER  .   madd	w0, w5, w4, w0
+# CHECK-NEXT: [1,3]     .    .    .DeeeER   .   add	x3, x3, x13
+# CHECK-NEXT: [1,4]     .    .    . DeeeER  .   subs	x1, x1, #1
+# CHECK-NEXT: [1,5]     .    .    . D==eeeeER   str	w0, [x21, x18, lsl #2]
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     2     1.0    0.0    0.0       ldr	w4, [x2], #4
+# CHECK-NEXT: 1.     2     1.5    0.0    0.0       ldr	w5, [x3]
+# CHECK-NEXT: 2.     2     4.0    0.0    0.0       madd	w0, w5, w4, w0
+# CHECK-NEXT: 3.     2     1.0    0.0    0.0       add	x3, x3, x13
+# CHECK-NEXT: 4.     2     1.0    0.0    0.0       subs	x1, x1, #1
+# CHECK-NEXT: 5.     2     3.0    0.0    0.0       str	w0, [x21, x18, lsl #2]
+# CHECK-NEXT:        2     1.9    0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/X86/in-order-cpu.s b/llvm/test/tools/llvm-mca/X86/in-order-cpu.s
--- a/llvm/test/tools/llvm-mca/X86/in-order-cpu.s
+++ b/llvm/test/tools/llvm-mca/X86/in-order-cpu.s
@@ -1,3 +1,3 @@
-# RUN: not llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=atom -o /dev/null 2>&1 | FileCheck %s
-
-# CHECK: error: please specify an out-of-order cpu. 'atom' is an in-order cpu.
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=atom -o /dev/null 2>&1 | FileCheck %s
+# CHECK: warning: support for in-order CPU 'atom' is experimental.
+movsbw	%al, %di
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -335,9 +335,8 @@
     return 1;
 
   if (!PrintInstructionTables && !STI->getSchedModel().isOutOfOrder()) {
-    WithColor::error() << "please specify an out-of-order cpu. '" << MCPU
-                       << "' is an in-order cpu.\n";
-    return 1;
+    WithColor::warning() << "support for in-order CPU '" << MCPU
+                         << "' is experimental.\n";
   }
 
   if (!STI->getSchedModel().hasInstrSchedModel()) {