Index: llvm/include/llvm/CodeGen/TargetSchedule.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSchedule.h +++ llvm/include/llvm/CodeGen/TargetSchedule.h @@ -168,8 +168,8 @@ /// when the operand indices are already known. UseMI may be NULL for an /// unknown user. unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx) - const; + const MachineInstr *UseMI, unsigned UseOperIdx, + bool *Cluster = nullptr) const; /// \brief Compute the instruction latency based on the available machine /// model. Index: llvm/lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -226,6 +226,23 @@ Objects.clear(); } +// For each instruction, go over its predecessors looking for any instruction +// that is clustered to it. Then, go over the successors of the clustered +// instruction and bind them to the original instruction above. +static void addClusterChain(SUnit *SU) { + for (auto &Def : SU->Preds) + if (Def.isCluster()) + for (auto &Use : Def.getSUnit()->Succs) { + SUnit *UseSU = Use.getSUnit(); + if (UseSU == SU || UseSU->isPred(SU)) + continue; + DEBUG(dbgs() << " Bind "; + SU->print(dbgs()); dbgs() << " - "; + UseSU->print(dbgs()); dbgs() << '\n';); + UseSU->addPred(SDep(SU, SDep::Artificial)); + } +} + void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { BB = bb; } @@ -297,8 +314,9 @@ // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. int UseOp = I->OpIdx; - MachineInstr *RegUse = nullptr; + MachineInstr *UseMI = nullptr; SDep Dep; + bool Cluster; if (UseOp < 0) Dep = SDep(SU, SDep::Artificial); else { @@ -306,14 +324,18 @@ // the scheduling region. SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); - RegUse = UseSU->getInstr(); + UseMI = UseSU->getInstr(); } - Dep.setLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, - UseOp)); - + Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + UseMI, UseOp, &Cluster)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); + if (Cluster) { + DEBUG(dbgs() << " Cluster "; + SU->print(dbgs(), this); dbgs() << " - "; + UseSU->print(dbgs(), this); dbgs() << '\n';); + UseSU->addPred(SDep(SU, SDep::Cluster)); + } } } } @@ -456,12 +478,20 @@ if ((LaneMask & DefLaneMask).any()) { SUnit *UseSU = I->SU; - MachineInstr *Use = UseSU->getInstr(); + MachineInstr *UseMI = UseSU->getInstr(); SDep Dep(SU, SDep::Data, Reg); - Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, - I->OperandIndex)); + bool Cluster; + Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, + UseMI, I->OperandIndex, + &Cluster)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); + if (Cluster) { + DEBUG(dbgs() << " Cluster "; + SU->print(dbgs(), this); dbgs() << " - "; + UseSU->print(dbgs(), this); dbgs() << '\n';); + UseSU->addPred(SDep(SU, SDep::Cluster)); + } } LaneMask &= ~KillLaneMask; @@ -994,6 +1024,11 @@ } } + // Bind any clusters. + addClusterChain(&ExitSU); + for (auto &I : MISUnitMap) + addClusterChain(I.second); + if (DbgMI) FirstDbgValue = DbgMI; Index: llvm/lib/CodeGen/TargetSchedule.cpp =================================================================== --- llvm/lib/CodeGen/TargetSchedule.cpp +++ llvm/lib/CodeGen/TargetSchedule.cpp @@ -186,7 +186,10 @@ // Top-level API for clients that know the operand indices. unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx) const { + const MachineInstr *UseMI, unsigned UseOperIdx, bool *Cluster) const { + + if (Cluster != nullptr) + *Cluster = false; if (!hasInstrSchedModel() && !hasInstrItineraries()) return TII->defaultDefLatency(SchedModel, *DefMI); @@ -233,7 +236,7 @@ if (UseDesc->NumReadAdvanceEntries == 0) return Latency; unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); - int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID, Cluster); if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap return 0; return Latency - Advance;