diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -374,6 +374,9 @@ /// dump the scheduled Sequence. void dumpSchedule() const; + /// Print execution trace of the schedule top-down or bottom-up. + void dumpScheduleTraceTopDown() const; + void dumpScheduleTraceBottomUp() const; // Lesser helpers... bool checkSchedLimit(); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -147,6 +147,21 @@ cl::desc("The threshold for fast cluster"), cl::init(1000)); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static cl::opt MISchedDumpScheduleTrace( + "misched-dump-schedule-trace", cl::Hidden, cl::init(false), + cl::desc("Dump resource usage at schedule boundary.")); +static cl::opt + HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, + cl::desc("Set width of the columns with " + "the resources and schedule units"), + cl::init(19)); +static cl::opt + ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, + cl::desc("Set width of the columns showing resource booking."), + cl::init(5)); +#endif + // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -930,8 +945,153 @@ } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static const char *scheduleTableLegend = " i: issue\n x: resource booked"; + +LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const { + // Nothing to show if there is no or just one instruction. + if (BB->size() < 2) + return; + + dbgs() << " * Schedule table (TopDown):\n"; + dbgs() << scheduleTableLegend << "\n"; + const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle; + unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle; + for (MachineInstr &MI : *this) { + SUnit *SU = getSUnit(&MI); + if (!SU) + continue; + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle) + LastCycle = SU->TopReadyCycle + PI->Cycles - 1; + } + } + // Print the header with the cycles + dbgs() << llvm::left_justify("Cycle", HeaderColWidth); + for (unsigned C = FirstCycle; C <= LastCycle; ++C) + dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth); + dbgs() << "|\n"; + + for (MachineInstr &MI : *this) { + SUnit *SU = getSUnit(&MI); + if (!SU) { + dbgs() << "Missing SUnit\n"; + continue; + } + std::string NodeName("SU("); + NodeName += std::to_string(SU->NodeNum) + ")"; + dbgs() << llvm::left_justify(NodeName, HeaderColWidth); + unsigned C = FirstCycle; + for (; C <= LastCycle; ++C) { + if (C == SU->TopReadyCycle) + dbgs() << llvm::left_justify("| i", ColWidth); + else + dbgs() << llvm::left_justify("|", ColWidth); + } + dbgs() << "|\n"; + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + C = FirstCycle; + const std::string ResName = + SchedModel.getResourceName(PI->ProcResourceIdx); + dbgs() << llvm::left_justify(ResName, HeaderColWidth); + for (; C < SU->TopReadyCycle; ++C) { + dbgs() << llvm::left_justify("|", ColWidth); + } + for (unsigned i = 0; i < PI->Cycles; ++i, ++C) + dbgs() << llvm::left_justify("| x", ColWidth); + while (C++ <= LastCycle) + dbgs() << llvm::left_justify("|", ColWidth); + // Place end char + dbgs() << "| \n"; + } + } +} + +LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const { + // Nothing to show if there is no or just one instruction. + if (BB->size() < 2) + return; + + dbgs() << " * Schedule table (BottomUp):\n"; + dbgs() << scheduleTableLegend << "\n"; + + const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle; + int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle; + for (MachineInstr &MI : *this) { + SUnit *SU = getSUnit(&MI); + if (!SU) + continue; + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle) + LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1; + } + } + // Print the header with the cycles + dbgs() << llvm::left_justify("Cycle", HeaderColWidth); + for (int C = FirstCycle; C >= LastCycle; --C) + dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth); + dbgs() << "|\n"; + + for (MachineInstr &MI : *this) { + SUnit *SU = getSUnit(&MI); + if (!SU) { + dbgs() << "Missing SUnit\n"; + continue; + } + std::string NodeName("SU("); + NodeName += std::to_string(SU->NodeNum) + ")"; + dbgs() << llvm::left_justify(NodeName, HeaderColWidth); + int C = FirstCycle; + for (; C >= LastCycle; --C) { + if (C == (int)SU->BotReadyCycle) + dbgs() << llvm::left_justify("| i", ColWidth); + else + dbgs() << llvm::left_justify("|", ColWidth); + } + dbgs() << "|\n"; + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + C = FirstCycle; + const std::string ResName = + SchedModel.getResourceName(PI->ProcResourceIdx); + dbgs() << llvm::left_justify(ResName, HeaderColWidth); + for (; C > (int)SU->BotReadyCycle; --C) { + dbgs() << llvm::left_justify("|", ColWidth); + } + for (unsigned i = 0; i < PI->Cycles; ++i, --C) + dbgs() << llvm::left_justify("| x", ColWidth); + while (C-- >= LastCycle) + dbgs() << llvm::left_justify("|", ColWidth); + // Place end char + dbgs() << "| \n"; + } + } +} +#endif + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { + if (MISchedDumpScheduleTrace) { + if (ForceTopDown) + dumpScheduleTraceTopDown(); + else if (ForceBottomUp) + dumpScheduleTraceBottomUp(); + else { + dbgs() << "* Schedule table (Bidirectional): not implemented\n"; + } + } + for (MachineInstr &MI : *this) { if (SUnit *SU = getSUnit(&MI)) dumpNode(*SU); diff --git a/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir @@ -0,0 +1,83 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \ +# RUN: -misched-topdown=true -sched-print-cycles=true \ +# RUN: -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-header-width=21 \ +# RUN: 2>&1 | FileCheck %s --check-prefix=TOP --strict-whitespace + +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \ +# RUN: -misched-bottomup=true -sched-print-cycles=true \ +# RUN: -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \ +# RUN: 2>&1 | FileCheck %s --check-prefix=BOTTOM --strict-whitespace + +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \ +# RUN: -sched-print-cycles=true -misched-dump-schedule-trace=true \ +# RUN: 2>&1 | FileCheck %s --check-prefix=BIDIRECTIONAL + +# REQUIRES: asserts, aarch64-registered-target +--- +name: f +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2, $x6, $q0 + %14:fpr128 = EXTv16i8 $q0, $q0, 8 + $x3 = ADDXrr $x0, $x0 + $x4 = ADDXrr $x1, $x1 + $x5 = ADDXrr $x2, $x2 + $x7 = ADDXrr $x6, $x6 + +# TOP-LABEL: *** Final schedule for %bb.0 *** +# TOP-NEXT: * Schedule table (TopDown): +# TOP-NEXT: i: issue +# TOP-NEXT: x: resource booked +# TOP-NEXT: Cycle | 0 | 1 | 2 | +# TOP-NEXT: SU(0) | i | | | +# TOP-NEXT: CortexA55UnitFPALU | x | x | | +# TOP-NEXT: SU(1) | i | | | +# TOP-NEXT: CortexA55UnitALU | x | | | +# TOP-NEXT: SU(2) | | i | | +# TOP-NEXT: CortexA55UnitALU | | x | | +# TOP-NEXT: SU(3) | | i | | +# TOP-NEXT: CortexA55UnitALU | | x | | +# TOP-NEXT: SU(4) | | | i | +# TOP-NEXT: CortexA55UnitALU | | | x | +# TOP-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]: dead %0:fpr128 = EXTv16i8 $q0, $q0, 8 +# TOP-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x3 = ADDXrr $x0, $x0 +# TOP-NEXT: SU(2) [TopReadyCycle = 1, BottomReadyCycle = 0]: $x4 = ADDXrr $x1, $x1 +# TOP-NEXT: SU(3) [TopReadyCycle = 1, BottomReadyCycle = 0]: $x5 = ADDXrr $x2, $x2 +# TOP-NEXT: SU(4) [TopReadyCycle = 2, BottomReadyCycle = 0]: $x7 = ADDXrr $x6, $x6 + +# BOTTOM-LABEL: *** Final schedule for %bb.0 *** +# BOTTOM-NEXT: * Schedule table (BottomUp): +# BOTTOM-NEXT: i: issue +# BOTTOM-NEXT: x: resource booked +# BOTTOM-NEXT: Cycle | 3 | 2 | 1 | 0 | +# BOTTOM-NEXT: SU(0) | i | | | | +# BOTTOM-NEXT: CortexA55UnitFPALU | x | x | | | +# BOTTOM-NEXT: SU(1) | | | i | | +# BOTTOM-NEXT: CortexA55UnitALU | | | x | | +# BOTTOM-NEXT: SU(2) | | | i | | +# BOTTOM-NEXT: CortexA55UnitALU | | | x | | +# BOTTOM-NEXT: SU(3) | | | | i | +# BOTTOM-NEXT: CortexA55UnitALU | | | | x | +# BOTTOM-NEXT: SU(4) | | | | i | +# BOTTOM-NEXT: CortexA55UnitALU | | | | x | +# BOTTOM-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]: dead %0:fpr128 = EXTv16i8 $q0, $q0, 8 +# BOTTOM-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x3 = ADDXrr $x0, $x0 +# BOTTOM-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x4 = ADDXrr $x1, $x1 +# BOTTOM-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x5 = ADDXrr $x2, $x2 +# BOTTOM-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x7 = ADDXrr $x6, $x6 + +# This test shows that at the moment we cannot generate the trace of +# bidirectional scheduling as the values of TopReadyCycle and +# BottomReadyCycle are inconsistent. + +# BIDIRECTIONAL-LABEL: *** Final schedule for %bb.0 *** +# BIDIRECTIONAL-NEXT: * Schedule table (Bidirectional): not implemented +# BIDIRECTIONAL-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]: dead %0:fpr128 = EXTv16i8 $q0, $q0, 8 +# BIDIRECTIONAL-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x3 = ADDXrr $x0, $x0 +# BIDIRECTIONAL-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x4 = ADDXrr $x1, $x1 +# BIDIRECTIONAL-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x5 = ADDXrr $x2, $x2 +# BIDIRECTIONAL-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x7 = ADDXrr $x6, $x6