diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -374,6 +374,9 @@
   /// dump the scheduled Sequence.
   void dumpSchedule() const;
+  /// Print execution trace of the schedule top-down or bottom-up.
+  void dumpScheduleTraceTopDown() const;
+  void dumpScheduleTraceBottomUp() const;
   // Lesser helpers...
   bool checkSchedLimit();
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -147,6 +147,21 @@
                          cl::desc("The threshold for fast cluster"),
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> MISchedDumpScheduleTrace(
+    "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
+    cl::desc("Dump resource usage at schedule boundary."));
+static cl::opt<unsigned>
+    HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
+                   cl::desc("Set width of the columns with "
+                            "the resources and schedule units"),
+                   cl::init(19));
+static cl::opt<unsigned>
+    ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
+             cl::desc("Set width of the columns showing resource booking."),
+             cl::init(5));
 // DAG subtrees must have at least this many nodes.
 static const unsigned MinSubtreeSize = 8;
@@ -930,8 +945,153 @@
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static const char *scheduleTableLegend = "  i: issue\n  x: resource booked";
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
+  //  Nothing to show if there is no or just one instruction.
+  if (BB->size() < 2)
+    return;
+  dbgs() << " * Schedule table (TopDown):\n";
+  dbgs() << scheduleTableLegend << "\n";
+  const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
+  unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
+  for (MachineInstr &MI : *this) {
+    SUnit *SU = getSUnit(&MI);
+    if (!SU)
+      continue;
+    const MCSchedClassDesc *SC = getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+                                       PE = SchedModel.getWriteProcResEnd(SC);
+         PI != PE; ++PI) {
+      if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
+        LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
+    }
+  }
+  // Print the header with the cycles
+  dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+  for (unsigned C = FirstCycle; C <= LastCycle; ++C)
+    dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+  dbgs() << "|\n";
+  for (MachineInstr &MI : *this) {
+    SUnit *SU = getSUnit(&MI);
+    if (!SU) {
+      dbgs() << "Missing SUnit\n";
+      continue;
+    }
+    std::string NodeName("SU(");
+    NodeName += std::to_string(SU->NodeNum) + ")";
+    dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+    unsigned C = FirstCycle;
+    for (; C <= LastCycle; ++C) {
+      if (C == SU->TopReadyCycle)
+        dbgs() << llvm::left_justify("| i", ColWidth);
+      else
+        dbgs() << llvm::left_justify("|", ColWidth);
+    }
+    dbgs() << "|\n";
+    const MCSchedClassDesc *SC = getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+                                       PE = SchedModel.getWriteProcResEnd(SC);
+         PI != PE; ++PI) {
+      C = FirstCycle;
+      const std::string ResName =
+          SchedModel.getResourceName(PI->ProcResourceIdx);
+      dbgs() << llvm::left_justify(ResName, HeaderColWidth);
+      for (; C < SU->TopReadyCycle; ++C) {
+        dbgs() << llvm::left_justify("|", ColWidth);
+      }
+      for (unsigned i = 0; i < PI->Cycles; ++i, ++C)
+        dbgs() << llvm::left_justify("| x", ColWidth);
+      while (C++ <= LastCycle)
+        dbgs() << llvm::left_justify("|", ColWidth);
+      // Place end char
+      dbgs() << "| \n";
+    }
+  }
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
+  //  Nothing to show if there is no or just one instruction.
+  if (BB->size() < 2)
+    return;
+  dbgs() << " * Schedule table (BottomUp):\n";
+  dbgs() << scheduleTableLegend << "\n";
+  const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
+  int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
+  for (MachineInstr &MI : *this) {
+    SUnit *SU = getSUnit(&MI);
+    if (!SU)
+      continue;
+    const MCSchedClassDesc *SC = getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+                                       PE = SchedModel.getWriteProcResEnd(SC);
+         PI != PE; ++PI) {
+      if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
+        LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
+    }
+  }
+  // Print the header with the cycles
+  dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+  for (int C = FirstCycle; C >= LastCycle; --C)
+    dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+  dbgs() << "|\n";
+  for (MachineInstr &MI : *this) {
+    SUnit *SU = getSUnit(&MI);
+    if (!SU) {
+      dbgs() << "Missing SUnit\n";
+      continue;
+    }
+    std::string NodeName("SU(");
+    NodeName += std::to_string(SU->NodeNum) + ")";
+    dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+    int C = FirstCycle;
+    for (; C >= LastCycle; --C) {
+      if (C == (int)SU->BotReadyCycle)
+        dbgs() << llvm::left_justify("| i", ColWidth);
+      else
+        dbgs() << llvm::left_justify("|", ColWidth);
+    }
+    dbgs() << "|\n";
+    const MCSchedClassDesc *SC = getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+                                       PE = SchedModel.getWriteProcResEnd(SC);
+         PI != PE; ++PI) {
+      C = FirstCycle;
+      const std::string ResName =
+          SchedModel.getResourceName(PI->ProcResourceIdx);
+      dbgs() << llvm::left_justify(ResName, HeaderColWidth);
+      for (; C > (int)SU->BotReadyCycle; --C) {
+        dbgs() << llvm::left_justify("|", ColWidth);
+      }
+      for (unsigned i = 0; i < PI->Cycles; ++i, --C)
+        dbgs() << llvm::left_justify("| x", ColWidth);
+      while (C-- >= LastCycle)
+        dbgs() << llvm::left_justify("|", ColWidth);
+      // Place end char
+      dbgs() << "| \n";
+    }
+  }
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
+  if (MISchedDumpScheduleTrace) {
+    if (ForceTopDown)
+      dumpScheduleTraceTopDown();
+    else if (ForceBottomUp)
+      dumpScheduleTraceBottomUp();
+    else {
+      dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+    }
+  }
   for (MachineInstr &MI : *this) {
     if (SUnit *SU = getSUnit(&MI))
diff --git a/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
@@ -0,0 +1,83 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-topdown=true -sched-print-cycles=true \
+# RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-header-width=21 \
+# RUN:  2>&1 | FileCheck %s --check-prefix=TOP --strict-whitespace
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-bottomup=true -sched-print-cycles=true \
+# RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \
+# RUN:  2>&1 | FileCheck %s --check-prefix=BOTTOM  --strict-whitespace
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -sched-print-cycles=true -misched-dump-schedule-trace=true \
+# RUN:  2>&1 | FileCheck %s --check-prefix=BIDIRECTIONAL
+# REQUIRES: asserts, aarch64-registered-target
+name: f
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2, $x6, $q0
+    %14:fpr128 = EXTv16i8 $q0, $q0, 8
+    $x3 = ADDXrr $x0, $x0
+    $x4 = ADDXrr $x1, $x1
+    $x5 = ADDXrr $x2, $x2
+    $x7 = ADDXrr $x6, $x6
+# TOP-LABEL: *** Final schedule for %bb.0 ***
+# TOP-NEXT:  * Schedule table (TopDown):
+# TOP-NEXT:   i: issue
+# TOP-NEXT:   x: resource booked
+# TOP-NEXT: Cycle                | 0  | 1  | 2  |
+# TOP-NEXT: SU(0)                | i  |    |    |
+# TOP-NEXT: CortexA55UnitFPALU   | x  | x  |    |
+# TOP-NEXT: SU(1)                | i  |    |    |
+# TOP-NEXT: CortexA55UnitALU     | x  |    |    |
+# TOP-NEXT: SU(2)                |    | i  |    |
+# TOP-NEXT: CortexA55UnitALU     |    | x  |    |
+# TOP-NEXT: SU(3)                |    | i  |    |
+# TOP-NEXT: CortexA55UnitALU     |    | x  |    |
+# TOP-NEXT: SU(4)                |    |    | i  |
+# TOP-NEXT: CortexA55UnitALU     |    |    | x  |
+# TOP-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]:   dead %0:fpr128 = EXTv16i8 $q0, $q0, 8
+# TOP-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 0]:   $x3 = ADDXrr $x0, $x0
+# TOP-NEXT: SU(2) [TopReadyCycle = 1, BottomReadyCycle = 0]:   $x4 = ADDXrr $x1, $x1
+# TOP-NEXT: SU(3) [TopReadyCycle = 1, BottomReadyCycle = 0]:   $x5 = ADDXrr $x2, $x2
+# TOP-NEXT: SU(4) [TopReadyCycle = 2, BottomReadyCycle = 0]:   $x7 = ADDXrr $x6, $x6
+# BOTTOM-LABEL: *** Final schedule for %bb.0 ***
+# BOTTOM-NEXT:  * Schedule table (BottomUp):
+# BOTTOM-NEXT:   i: issue
+# BOTTOM-NEXT:   x: resource booked
+# BOTTOM-NEXT: Cycle              | 3 | 2 | 1 | 0 |
+# BOTTOM-NEXT: SU(0)              | i |   |   |   |
+# BOTTOM-NEXT: CortexA55UnitFPALU | x | x |   |   |
+# BOTTOM-NEXT: SU(1)              |   |   | i |   |
+# BOTTOM-NEXT: CortexA55UnitALU   |   |   | x |   |
+# BOTTOM-NEXT: SU(2)              |   |   | i |   |
+# BOTTOM-NEXT: CortexA55UnitALU   |   |   | x |   |
+# BOTTOM-NEXT: SU(3)              |   |   |   | i |
+# BOTTOM-NEXT: CortexA55UnitALU   |   |   |   | x |
+# BOTTOM-NEXT: SU(4)              |   |   |   | i |
+# BOTTOM-NEXT: CortexA55UnitALU   |   |   |   | x |
+# BOTTOM-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]:   dead %0:fpr128 = EXTv16i8 $q0, $q0, 8
+# BOTTOM-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 1]:   $x3 = ADDXrr $x0, $x0
+# BOTTOM-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 1]:   $x4 = ADDXrr $x1, $x1
+# BOTTOM-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]:   $x5 = ADDXrr $x2, $x2
+# BOTTOM-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]:   $x7 = ADDXrr $x6, $x6
+# This test shows that at the moment we cannot generate the trace of
+# bidirectional scheduling as the values of TopReadyCycle and
+# BottomReadyCycle are inconsistent.
+# BIDIRECTIONAL-LABEL: *** Final schedule for %bb.0 ***
+# BIDIRECTIONAL-NEXT:  * Schedule table (Bidirectional): not implemented
+# BIDIRECTIONAL-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]:   dead %0:fpr128 = EXTv16i8 $q0, $q0, 8
+# BIDIRECTIONAL-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 1]:   $x3 = ADDXrr $x0, $x0
+# BIDIRECTIONAL-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 1]:   $x4 = ADDXrr $x1, $x1
+# BIDIRECTIONAL-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]:   $x5 = ADDXrr $x2, $x2
+# BIDIRECTIONAL-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]:   $x7 = ADDXrr $x6, $x6