Index: llvm/include/llvm/CodeGen/ScheduleDAG.h =================================================================== --- llvm/include/llvm/CodeGen/ScheduleDAG.h +++ llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -235,6 +235,9 @@ "SDep::Output edge cannot use the zero register!"); Contents.Reg = Reg; } + + raw_ostream &print(raw_ostream &O, + const TargetRegisterInfo *TRI = nullptr) const; }; template <> @@ -458,7 +461,10 @@ void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; - void print(raw_ostream &O, const ScheduleDAG *G) const; + raw_ostream &print(raw_ostream &O, + const SUnit *N = nullptr, + const SUnit *X = nullptr) const; + raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const; private: void ComputeDepth(); Index: llvm/lib/CodeGen/ScheduleDAG.cpp =================================================================== --- llvm/lib/CodeGen/ScheduleDAG.cpp +++ llvm/lib/CodeGen/ScheduleDAG.cpp @@ -67,6 +67,41 @@ return &TII->get(Node->getMachineOpcode()); } +LLVM_DUMP_METHOD +raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + switch (getKind()) { + case Data: OS << "Data"; break; + case Anti: OS << "Anti"; break; + case Output: OS << "Out "; break; + case Order: OS << "Ord "; break; + } + + switch (getKind()) { + case Data: + OS << " Latency=" << getLatency(); + if (TRI && isAssignedRegDep()) + OS << " Reg=" << PrintReg(getReg(), TRI); + break; + case Anti: + case Output: + OS << " Latency=" << getLatency(); + break; + case Order: + OS << " Latency=" << getLatency(); + switch(Contents.OrdKind) { + case Barrier: OS << " Barrier"; break; + case MayAliasMem: + case MustAliasMem: OS << " Memory"; break; + case Artificial: OS << " Artificial"; break; + case Weak: OS << " Weak"; break; + case Cluster: OS << " Cluster"; break; + } + break; + } + + return OS; +} + bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SDep &PredDep : Preds) { @@ -302,16 +337,24 @@ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const { - if (this == &DAG->ExitSU) - OS << "ExitSU"; - else if (this == &DAG->EntrySU) +raw_ostream &SUnit::print(raw_ostream &OS, + const SUnit *Entry, const SUnit *Exit) const { + if (this == Entry) OS << "EntrySU"; + else if (this == Exit) + OS << "ExitSU"; else OS << "SU(" << NodeNum << ")"; + return OS; +} + +LLVM_DUMP_METHOD +raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { + return print(OS, &G->EntrySU, &G->ExitSU); } -LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const { +LLVM_DUMP_METHOD +void SUnit::dump(const ScheduleDAG *G) const { print(dbgs(), G); dbgs() << ": "; G->dumpNode(this); @@ -333,40 +376,18 @@ if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; - for (const SDep &SuccDep : Preds) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Preds) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } if (Succs.size() != 0) { dbgs() << " Successors:\n"; - for (const SDep &SuccDep : Succs) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Succs) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } } Index: llvm/test/CodeGen/AArch64/arm64-csldst-mmo.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-csldst-mmo.ll +++ llvm/test/CodeGen/AArch64/arm64-csldst-mmo.ll @@ -13,9 +13,9 @@ ; CHECK: SU(2): STRWui %WZR ; CHECK: SU(3): %X21, %X20 = LDPXi %SP ; CHECK: Predecessors: -; CHECK-NEXT: out SU(0) -; CHECK-NEXT: out SU(0) -; CHECK-NEXT: ord SU(0) +; CHECK-NEXT: SU(0): Out +; CHECK-NEXT: SU(0): Out +; CHECK-NEXT: SU(0): Ord ; CHECK-NEXT: Successors: define void @test1() { entry: Index: llvm/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll +++ llvm/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll @@ -8,8 +8,8 @@ ; CHECK: shiftable ; CHECK: SU(2): %vreg2 = SUBXri %vreg1, 20, 0 ; CHECK: Successors: -; CHECK-NEXT: data SU(4): Latency=1 Reg=%vreg2 -; CHECK-NEXT: data SU(3): Latency=2 Reg=%vreg2 +; CHECK-NEXT: SU(4): Data Latency=1 Reg=%vreg2 +; CHECK-NEXT: SU(3): Data Latency=2 Reg=%vreg2 ; CHECK: ********** INTERVALS ********** define i64 @shiftable(i64 %A, i64 %B) { %tmp0 = sub i64 %B, 20 Index: llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll +++ llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll @@ -7,11 +7,11 @@ ; CHECK: misched_bug:BB#0 entry ; CHECK: SU(2): %vreg2 = LDRWui %vreg0, 1; mem:LD4[%ptr1_plus1] GPR32:%vreg2 GPR64common:%vreg0 ; CHECK: Successors: -; CHECK-NEXT: data SU(5): Latency=4 Reg=%vreg2 -; CHECK-NEXT: ord SU(4): Latency=0 +; CHECK-NEXT: SU(5): Data Latency=4 Reg=%vreg2 +; CHECK-NEXT: SU(4): Ord Latency=0 ; CHECK: SU(3): STRWui %WZR, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0 ; CHECK: Successors: -; CHECK: ord SU(4): Latency=0 +; CHECK: SU(4): Ord Latency=0 ; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1 ; CHECK: SU(5): %W0 = COPY %vreg2; GPR32:%vreg2 ; CHECK: ** ScheduleDAGMI::schedule picking next node Index: llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll =================================================================== --- llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll +++ llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll @@ -37,8 +37,8 @@ ; CHECK: SU({{.*}}): [[VRB]] = LDRXui ; CHECK-NOT: SU ; CHECK: Successors: -; CHECK: ord SU([[DEPSTOREB:.*]]): Latency=0 -; CHECK: ord SU([[DEPSTOREA:.*]]): Latency=0 +; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0 +; CHECK: SU([[DEPSTOREA:.*]]): Ord Latency=0 ; CHECK: SU([[DEPSTOREA]]): STRXui %vreg{{.*}}, ; CHECK: SU([[DEPSTOREB]]): STRXui %vreg{{.*}}, Index: llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll =================================================================== --- llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll +++ llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll @@ -6,23 +6,23 @@ ; CHECK: ** List Scheduling ; CHECK: SU(2){{.*}}STR{{.*}}Volatile -; CHECK-NOT: ord SU -; CHECK: ord SU(3): Latency=1 -; CHECK-NOT: ord SU +; CHECK-NOT: SU({{.*}}): Ord +; CHECK: SU(3): Ord Latency=1 +; CHECK-NOT: SU({{.*}}): Ord ; CHECK: SU(3){{.*}}LDR{{.*}}Volatile -; CHECK-NOT: ord SU -; CHECK: ord SU(2): Latency=1 -; CHECK-NOT: ord SU +; CHECK-NOT: SU({{.*}}): Ord +; CHECK: SU(2): Ord Latency=1 +; CHECK-NOT: SU({{.*}}): Ord ; CHECK: Successors: ; CHECK: ** List Scheduling ; CHECK: SU(2){{.*}}STR{{.*}} -; CHECK-NOT: ord SU -; CHECK: ord SU(3): Latency=1 -; CHECK-NOT: ord SU +; CHECK-NOT: SU({{.*}}): Ord +; CHECK: SU(3): Ord Latency=1 +; CHECK-NOT: SU({{.*}}): Ord ; CHECK: SU(3){{.*}}LDR{{.*}} -; CHECK-NOT: ord SU -; CHECK: ord SU(2): Latency=1 -; CHECK-NOT: ord SU +; CHECK-NOT: SU({{.*}}): Ord +; CHECK: SU(2): Ord Latency=1 +; CHECK-NOT: SU({{.*}}): Ord ; CHECK: Successors: define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind { entry: Index: llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll @@ -13,13 +13,13 @@ ; CHECK: rdefs left ; CHECK-NEXT: Latency : 4 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; CHECK-SAME: Latency=1 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=3 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=3 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=4 define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize { %1 = load i32, i32* @a, align 4 Index: llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll @@ -8,9 +8,9 @@ ; CHECK: rdefs left ; CHECK-NEXT: Latency : 3 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; CHECK-SAME: Latency=3 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=3 define i32 @foo(i32* %a) nounwind optsize { Index: llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll @@ -10,7 +10,7 @@ ; CHECK: rdefs left ; CHECK-NEXT: Latency : 2 ; CHECK: Successors -; CHECK: data +; CHECK: Data ; CHECK-SAME: Latency=1 define i32 @bar(i32 %v0, i32 %v1, i32 %v2, i32* %addr) { Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll @@ -10,7 +10,7 @@ ; > VMULS common latency = 5 ; CHECK: Latency : 5 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; > VMULS read-advanced latency to VMLAS = 0 ; CHECK-SAME: Latency=0 @@ -18,14 +18,14 @@ ; > VMLAS common latency = 9 ; CHECK: Latency : 9 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; > VMLAS read-advanced latency to the next VMLAS = 4 ; CHECK-SAME: Latency=4 ; CHECK: VMLAS ; CHECK: Latency : 9 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; > VMLAS not-optimized latency to VMOVRS = 9 ; CHECK-SAME: Latency=9 @@ -47,7 +47,7 @@ ; > VMULfd common latency = 5 ; CHECK: Latency : 5 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; VMULfd read-advanced latency to VMLAfd = 0 ; CHECK-SAME: Latency=0 @@ -55,14 +55,14 @@ ; > VMLAfd common latency = 9 ; CHECK: Latency : 9 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; > VMLAfd read-advanced latency to the next VMLAfd = 4 ; CHECK-SAME: Latency=4 ; CHECK: VMLAfd ; CHECK: Latency : 9 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; > VMLAfd not-optimized latency to VMOVRRD = 9 ; CHECK-SAME: Latency=9 Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll @@ -13,15 +13,15 @@ ; CHECK: rdefs left ; CHECK-NEXT: Latency : 6 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; CHECK-SAME: Latency=1 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=1 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=5 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=5 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=6 define i32 @bar(i32* %iptr) minsize optsize { %1 = load double, double* @a, align 8 Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll @@ -8,11 +8,11 @@ ; CHECK: rdefs left ; CHECK-NEXT: Latency : 6 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; CHECK-SAME: Latency=5 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=5 -; CHECK-NEXT: data +; CHECK-NEXT: Data ; CHECK-SAME: Latency=6 define double @foo(double* %a) nounwind optsize { Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll @@ -9,7 +9,7 @@ ; CHECK: rdefs left ; CHECK-NEXT: Latency : 4 ; CHECK: Successors: -; CHECK: data +; CHECK: Data ; CHECK-SAME: Latency=1 @a = global double 0.0, align 4