Index: llvm/include/llvm/CodeGen/MachineScheduler.h =================================================================== --- llvm/include/llvm/CodeGen/MachineScheduler.h +++ llvm/include/llvm/CodeGen/MachineScheduler.h @@ -101,6 +101,11 @@ extern cl::opt ForceTopDown; extern cl::opt ForceBottomUp; extern cl::opt VerifyScheduling; +#ifndef NDEBUG +extern cl::opt ViewMISchedDAGs; +#else +extern const bool ViewMISchedDAGs; +#endif class AAResults; class LiveIntervals; Index: llvm/include/llvm/CodeGen/VLIWMachineScheduler.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/VLIWMachineScheduler.h @@ -0,0 +1,268 @@ +//===- VLIWMachineScheduler.h - VLIW-Focused Scheduling Pass ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// // +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VLIWMACHINESCHEDULER_H +#define LLVM_CODEGEN_VLIWMACHINESCHEDULER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include +#include +#include + +namespace llvm { + +class DFAPacketizer; +class RegisterClassInfo; +class ScheduleHazardRecognizer; +class SUnit; +class TargetInstrInfo; +class TargetSubtargetInfo; + +class VLIWResourceModel { +protected: + const TargetInstrInfo *TII; + + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per se, but assumes definition of resource + /// model by a target. + DFAPacketizer *ResourcesModel; + + const TargetSchedModel *SchedModel; + + /// Local packet/bundle model. Purely + /// internal to the MI scheduler at the time. + SmallVector Packet; + + /// Total packets created. + unsigned TotalPackets = 0; + +public: + VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM); + + virtual ~VLIWResourceModel(); + + virtual void reset(); + + virtual bool hasDependence(const SUnit *SUd, const SUnit *SUu); + virtual bool isResourceAvailable(SUnit *SU, bool IsTop); + virtual bool reserveResources(SUnit *SU, bool IsTop); + unsigned getTotalPackets() const { return TotalPackets; } + size_t getPacketInstCount() const { return Packet.size(); } + bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); } + +protected: + virtual DFAPacketizer *createPacketizer(const TargetSubtargetInfo &STI) const; +}; + +/// Extend the standard ScheduleDAGMILive to provide more context and override +/// the top-level schedule() driver. +class VLIWMachineScheduler : public ScheduleDAGMILive { +public: + VLIWMachineScheduler(MachineSchedContext *C, + std::unique_ptr S) + : ScheduleDAGMILive(C, std::move(S)) {} + + /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's + /// time to do some work. + void schedule() override; + + RegisterClassInfo *getRegClassInfo() { return RegClassInfo; } + int getBBSize() { return BB->size(); } +}; + +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of a VLIW-aware +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +class ConvergingVLIWScheduler : public MachineSchedStrategy { +protected: + /// Store the state used by ConvergingVLIWScheduler heuristics, required + /// for the lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU = nullptr; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Best scheduling cost. + int SCost = 0; + + SchedCandidate() = default; + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, + NodeOrder, + SingleExcess, + SingleCritical, + SingleMax, + MultiPressure, + BestCost, + Weak + }; + + // Constants used to denote relative importance of + // heuristic components for cost computation. + static constexpr unsigned PriorityOne = 200; + static constexpr unsigned PriorityTwo = 50; + static constexpr unsigned PriorityThree = 75; + static constexpr unsigned ScaleTwo = 10; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct VLIWSchedBoundary { + VLIWMachineScheduler *DAG = nullptr; + const TargetSchedModel *SchedModel = nullptr; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending = false; + + ScheduleHazardRecognizer *HazardRec = nullptr; + VLIWResourceModel *ResourceModel = nullptr; + + unsigned CurrCycle = 0; + unsigned IssueCount = 0; + unsigned CriticalPathLength = 0; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle = std::numeric_limits::max(); + + // Remember the greatest min operand latency. + unsigned MaxMinLatency = 0; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + VLIWSchedBoundary(unsigned ID, const Twine &Name) + : Available(ID, Name + ".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name + ".P") {} + + ~VLIWSchedBoundary(); + + void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { + DAG = dag; + SchedModel = smodel; + CurrCycle = 0; + IssueCount = 0; + // Initialize the critical path length limit, which used by the scheduling + // cost model to determine the value for scheduling an instruction. We use + // a slightly different heuristic for small and large functions. For small + // functions, it's important to use the height/depth of the instruction. + // For large functions, prioritizing by height or depth increases spills. + CriticalPathLength = DAG->getBBSize() / SchedModel->getIssueWidth(); + if (DAG->getBBSize() < 50) + // We divide by two as a cheap and simple heuristic to reduce the + // critcal path length, which increases the priority of using the graph + // height/depth in the scheduler's cost computation. + CriticalPathLength >>= 1; + else { + // For large basic blocks, we prefer a larger critical path length to + // decrease the priority of using the graph height/depth. + unsigned MaxPath = 0; + for (auto &SU : DAG->SUnits) + MaxPath = std::max(MaxPath, isTop() ? SU.getHeight() : SU.getDepth()); + CriticalPathLength = std::max(CriticalPathLength, MaxPath) + 1; + } + } + + bool isTop() const { + return Available.getID() == ConvergingVLIWScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + + bool isLatencyBound(SUnit *SU) { + if (CurrCycle >= CriticalPathLength) + return true; + unsigned PathLength = isTop() ? SU->getHeight() : SU->getDepth(); + return CriticalPathLength - CurrCycle <= PathLength; + } + }; + + VLIWMachineScheduler *DAG = nullptr; + const TargetSchedModel *SchedModel = nullptr; + + // State of the top and bottom scheduled instruction boundaries. + VLIWSchedBoundary Top; + VLIWSchedBoundary Bot; + + /// List of pressure sets that have a high pressure level in the region. + SmallVector HighPressureSets; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { TopQID = 1, BotQID = 2, LogMaxQID = 2 }; + + ConvergingVLIWScheduler() : Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + virtual ~ConvergingVLIWScheduler() = default; + + void initialize(ScheduleDAGMI *dag) override; + + SUnit *pickNode(bool &IsTopNode) override; + + void schedNode(SUnit *SU, bool IsTopNode) override; + + void releaseTopNode(SUnit *SU) override; + + void releaseBottomNode(SUnit *SU) override; + + unsigned reportPackets() { + return Top.ResourceModel->getTotalPackets() + + Bot.ResourceModel->getTotalPackets(); + } + +protected: + virtual VLIWResourceModel * + createVLIWResourceModel(const TargetSubtargetInfo &STI, + const TargetSchedModel *SchedModel) const; + + SUnit *pickNodeBidrectional(bool &IsTopNode); + + int pressureChange(const SUnit *SU, bool isBotUp); + + virtual int SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, RegPressureDelta &Delta, + bool verbose); + + CandResult pickNodeFromQueue(VLIWSchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + int Cost, PressureChange P = PressureChange()); + + void readyQueueVerboseDump(const RegPressureTracker &RPTracker, + SchedCandidate &Candidate, ReadyQueue &Q); +#endif +}; + +ScheduleDAGMILive *createVLIWSched(MachineSchedContext *C); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_VLIWMACHINESCHEDULER_H Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -189,6 +189,7 @@ TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp ValueTypes.cpp + VLIWMachineScheduler.cpp VirtRegMap.cpp WasmEHPrepare.cpp WinEHPrepare.cpp Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -90,12 +90,17 @@ "verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); +#ifndef NDEBUG +cl::opt ViewMISchedDAGs( + "view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); +#else +const bool ViewMISchedDAGs = false; +#endif // NDEBUG + } // end namespace llvm #ifndef NDEBUG -static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, - cl::desc("Pop up a window to show MISched dags after they are processed")); - /// In some situations a few uninteresting nodes depend on nearly all other /// nodes in the graph, provide a cutoff to hide them. static cl::opt ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, @@ -111,7 +116,6 @@ static cl::opt PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs")); #else -static const bool ViewMISchedDAGs = false; static const bool PrintDAGs = false; #endif // NDEBUG Index: llvm/lib/CodeGen/VLIWMachineScheduler.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -0,0 +1,1005 @@ +//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/VLIWMachineScheduler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "machine-scheduler" + +static cl::opt IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, + cl::ZeroOrMore, cl::init(false)); + +static cl::opt UseNewerCandidate("use-newer-candidate", cl::Hidden, + cl::ZeroOrMore, cl::init(true)); + +static cl::opt SchedDebugVerboseLevel("misched-verbose-level", + cl::Hidden, cl::ZeroOrMore, + cl::init(1)); + +// Check if the scheduler should penalize instructions that are available to +// early due to a zero-latency dependence. +static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, + cl::ZeroOrMore, cl::init(true)); + +// This value is used to determine if a register class is a high pressure set. +// We compute the maximum number of registers needed and divided by the total +// available. Then, we compare the result to this value. +static cl::opt RPThreshold("vliw-misched-reg-pressure", cl::Hidden, + cl::init(0.75f), + cl::desc("High register pressure threhold.")); + +VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, + const TargetSchedModel *SM) + : TII(STI.getInstrInfo()), SchedModel(SM) { + ResourcesModel = createPacketizer(STI); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.reserve(SchedModel->getIssueWidth()); + Packet.clear(); + ResourcesModel->clearResources(); +} + +void VLIWResourceModel::reset() { + Packet.clear(); + ResourcesModel->clearResources(); +} + +VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } + +/// Return true if there is a dependence between SUd and SUu. +bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { + if (SUd->Succs.size() == 0) + return false; + + for (const auto &S : SUd->Succs) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (S.isCtrl()) + continue; + + if (S.getSUnit() == SUu && S.getLatency() > 0) + return true; + } + return false; +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(*SU->getInstr())) + return false; + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::INLINEASM_BR: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + if (IsTop) { + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + if (hasDependence(Packet[i], SU)) + return false; + } else { + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + if (hasDependence(SU, Packet[i])) + return false; + } + return true; +} + +/// Keep track of available resources. +bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { + bool startNewCycle = false; + // Artificially reset state. + if (!SU) { + reset(); + TotalPackets++; + return false; + } + // If this SU does not fit in the packet or the packet is now full + // start a new one. + if (!isResourceAvailable(SU, IsTop) || + Packet.size() >= SchedModel->getIssueWidth()) { + reset(); + TotalPackets++; + startNewCycle = true; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(*SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::INLINEASM_BR: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); + LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + LLVM_DEBUG(Packet[i]->getInstr()->dump()); + } +#endif + + return startNewCycle; +} + +DFAPacketizer * +VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { + return STI.getInstrInfo()->CreateTargetScheduleState(STI); +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " + << printMBBReference(*BB) << " " << BB->getName() + << " in_func " << BB->getParent()->getName() + << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); + + buildDAGWithRegPressure(); + + Topo.InitDAGTopologicalSorting(); + + // Postprocess the DAG to add platform-specific artificial dependencies. + postprocessDAG(); + + SmallVector TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + SchedImpl->initialize(this); + + LLVM_DEBUG(unsigned maxH = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; + ++su) if (SUnits[su].getHeight() > maxH) maxH = + SUnits[su].getHeight(); + dbgs() << "Max Height " << maxH << "\n";); + LLVM_DEBUG(unsigned maxD = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; + ++su) if (SUnits[su].getDepth() > maxD) maxD = + SUnits[su].getDepth(); + dbgs() << "Max Depth " << maxD << "\n";); + LLVM_DEBUG(dump()); + if (ViewMISchedDAGs) + viewGraph(); + + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (true) { + LLVM_DEBUG( + dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) + break; + + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + LLVM_DEBUG({ + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { + DAG = static_cast(dag); + SchedModel = DAG->getSchedModel(); + + Top.init(DAG, SchedModel); + Bot.init(DAG, SchedModel); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); + const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + delete Top.HazardRec; + delete Bot.HazardRec; + Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + + delete Top.ResourceModel; + delete Bot.ResourceModel; + Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); + Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); + + const std::vector &MaxPressure = + DAG->getRegPressure().MaxSetPressure; + HighPressureSets.assign(MaxPressure.size(), 0); + for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { + unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); + HighPressureSets[i] = + ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); + } + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( + const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { + return new VLIWResourceModel(STI, SchedModel); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + for (const SDep &PI : SU->Preds) { + unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; + unsigned MinLatency = PI.getLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + + if (!SU->isScheduled) + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; + ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + + if (!SU->isScheduled) + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { + delete ResourceModel; + delete HazardRec; +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if (IssueCount + uops > SchedModel->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( + SUnit *SU, unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { + unsigned Width = SchedModel->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < std::numeric_limits::max() && + "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { + bool startNewCycle = false; + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + + // Update DFA model. + startNewCycle = ResourceModel->reserveResources(SU, isTop()); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + if (startNewCycle) { + LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } else + LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " + << CurrCycle << '\n'); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = std::numeric_limits::max(); + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin() + i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin() + i); + --i; + --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + auto AdvanceCycle = [this]() { + if (Available.empty()) + return true; + if (Available.size() == 1 && Pending.size() > 0) + return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || + getWeakLeft(*Available.begin(), isTop()) != 0; + return false; + }; + for (unsigned i = 0; AdvanceCycle(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); + (void)i; + ResourceModel->reserveResources(nullptr, isTop()); + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return nullptr; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, + const ReadyQueue &Q, SUnit *SU, + int Cost, PressureChange P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; + else + dbgs() << " "; + dbgs() << "cost(" << Cost << ")\t"; + DAG->dumpNode(*SU); +} + +// Very detailed queue dump, to be used with higher verbosity levels. +void ConvergingVLIWScheduler::readyQueueVerboseDump( + const RegPressureTracker &RPTracker, SchedCandidate &Candidate, + ReadyQueue &Q) { + RegPressureTracker &TempTracker = const_cast(RPTracker); + + dbgs() << ">>> " << Q.getName() << "\n"; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + std::stringstream dbgstr; + dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; + dbgs() << dbgstr.str(); + SchedulingCost(Q, *I, Candidate, RPDelta, true); + dbgs() << "\t"; + (*I)->getInstr()->dump(); + } + dbgs() << "\n"; +} +#endif + +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; + } + + return true; +} + +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; + } + return true; +} + +/// Check if the instruction changes the register pressure of a register in the +/// high pressure set. The function returns a negative value if the pressure +/// decreases and a positive value is the pressure increases. If the instruction +/// doesn't use a high pressure register or doesn't change the register +/// pressure, then return 0. +int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { + PressureDiff &PD = DAG->getPressureDiff(SU); + for (auto &P : PD) { + if (!P.isValid()) + continue; + // The pressure differences are computed bottom-up, so the comparision for + // an increase is positive in the bottom direction, but negative in the + // top-down direction. + if (HighPressureSets[P.getPSet()]) + return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); + } + return 0; +} + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + LLVM_DEBUG(if (verbose) dbgs() + << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); + // Forced priority is high. + if (SU->isScheduleHigh) { + ResCount += PriorityOne; + LLVM_DEBUG(dbgs() << "H|"); + } + + unsigned IsAvailableAmt = 0; + // Critical path first. + if (Q.getID() == TopQID) { + if (Top.isLatencyBound(SU)) { + LLVM_DEBUG(if (verbose) dbgs() << "LB|"); + ResCount += (SU->getHeight() * ScaleTwo); + } + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; + dbgs() << dbgstr.str(); + }); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Top.ResourceModel->isResourceAvailable(SU, true)) { + IsAvailableAmt = (PriorityTwo + PriorityThree); + ResCount += IsAvailableAmt; + LLVM_DEBUG(if (verbose) dbgs() << "A|"); + } else + LLVM_DEBUG(if (verbose) dbgs() << " |"); + } else { + if (Bot.isLatencyBound(SU)) { + LLVM_DEBUG(if (verbose) dbgs() << "LB|"); + ResCount += (SU->getDepth() * ScaleTwo); + } + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; + dbgs() << dbgstr.str(); + }); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Bot.ResourceModel->isResourceAvailable(SU, false)) { + IsAvailableAmt = (PriorityTwo + PriorityThree); + ResCount += IsAvailableAmt; + LLVM_DEBUG(if (verbose) dbgs() << "A|"); + } else + LLVM_DEBUG(if (verbose) dbgs() << " |"); + } + + unsigned NumNodesBlocking = 0; + if (Q.getID() == TopQID) { + // How many SUs does it block from scheduling? + // Look at all of the successors of this node. + // Count the number of nodes that + // this node is the sole unscheduled node for. + if (Top.isLatencyBound(SU)) + for (const SDep &SI : SU->Succs) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) + ++NumNodesBlocking; + } else { + // How many unscheduled predecessors block this node? + if (Bot.isLatencyBound(SU)) + for (const SDep &PI : SU->Preds) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) + ++NumNodesBlocking; + } + ResCount += (NumNodesBlocking * ScaleTwo); + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; + dbgs() << dbgstr.str(); + }); + + // Factor in reg pressure as a heuristic. + if (!IgnoreBBRegPressure) { + // Decrease priority by the amount that register pressure exceeds the limit. + ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); + // Decrease priority if register pressure exceeds the limit. + ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); + // Decrease priority slightly if register pressure would increase over the + // current maximum. + ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); + // If there are register pressure issues, then we remove the value added for + // the instruction being available. The rationale is that we really don't + // want to schedule an instruction that causes a spill. + if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && + (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || + Delta.CurrentMax.getUnitInc())) + ResCount -= IsAvailableAmt; + LLVM_DEBUG(if (verbose) { + dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" + << Delta.CriticalMax.getUnitInc() << "/" + << Delta.CurrentMax.getUnitInc() << ")|"; + }); + } + + // Give preference to a zero latency instruction if the dependent + // instruction is in the current packet. + if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { + for (const SDep &PI : SU->Preds) { + if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && + PI.getLatency() == 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount += PriorityThree; + LLVM_DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { + for (const SDep &SI : SU->Succs) { + if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && + SI.getLatency() == 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount += PriorityThree; + LLVM_DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } + + // If the instruction has a non-zero latency dependence with an instruction in + // the current packet, then it should not be scheduled yet. The case occurs + // when the dependent instruction is scheduled in a new packet, so the + // scheduler updates the current cycle and pending instructions become + // available. + if (CheckEarlyAvail) { + if (Q.getID() == TopQID) { + for (const auto &PI : SU->Preds) { + if (PI.getLatency() > 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount -= PriorityOne; + LLVM_DEBUG(if (verbose) dbgs() << "D|"); + } + } + } else { + for (const auto &SI : SU->Succs) { + if (SI.getLatency() > 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount -= PriorityOne; + LLVM_DEBUG(if (verbose) dbgs() << "D|"); + } + } + } + } + + LLVM_DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "Total " << std::setw(4) << ResCount << ")"; + dbgs() << dbgstr.str(); + }); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult +ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + ReadyQueue &Q = Zone.Available; + LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) + readyQueueVerboseDump(RPTracker, Candidate, Q); + else Q.dump();); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + // Choose node order for negative cost candidates. There is no good + // candidate in this case. + if (CurrentCost < 0 && Candidate.SCost < 0) { + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || + (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + } + continue; + } + + // Best cost. + if (CurrentCost > Candidate.SCost) { + LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Choose an instruction that does not depend on an artificial edge. + unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); + unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); + if (CurrWeak != CandWeak) { + if (CurrWeak < CandWeak) { + LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = Weak; + } + continue; + } + + if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { + unsigned CurrSize, CandSize; + if (Q.getID() == TopQID) { + CurrSize = (*I)->Succs.size(); + CandSize = Candidate.SU->Succs.size(); + } else { + CurrSize = (*I)->Preds.size(); + CandSize = Candidate.SU->Preds.size(); + } + if (CurrSize > CandSize) { + LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + } + // Keep the old candidate if it's a better candidate. That is, don't use + // the subsequent tie breaker. + if (CurrSize != CandSize) + continue; + } + + // Tie breaker. + // To avoid scheduling indeterminism, we need a tie breaker + // for the case when cost is identical for two nodes. + if (UseNewerCandidate && CurrentCost == Candidate.SCost) { + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || + (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + LLVM_DEBUG(dbgs() << "Picked only Top\n"); + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" + << reportPackets() << ")\n"; + DAG->dumpNode(*SU)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs +/// to update it's state based on the current cycle before MachineSchedStrategy +/// does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + Top.bumpNode(SU); + SU->TopReadyCycle = Top.CurrCycle; + } else { + Bot.bumpNode(SU); + SU->BotReadyCycle = Bot.CurrCycle; + } +} Index: llvm/lib/Target/Hexagon/HexagonMachineScheduler.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonMachineScheduler.h +++ llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -13,261 +13,28 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include -#include -#include -#include -#include +#include "llvm/CodeGen/VLIWMachineScheduler.h" namespace llvm { class SUnit; -class VLIWResourceModel { - /// ResourcesModel - Represents VLIW state. - /// Not limited to VLIW targets per se, but assumes - /// definition of DFA by a target. - DFAPacketizer *ResourcesModel; - - const TargetSchedModel *SchedModel; - - /// Local packet/bundle model. Purely - /// internal to the MI schedulre at the time. - std::vector Packet; - - /// Total packets created. - unsigned TotalPackets = 0; - +class HexagonVLIWResourceModel : public VLIWResourceModel { public: - VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) - : SchedModel(SM) { - ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); - - // This hard requirement could be relaxed, - // but for now do not let it proceed. - assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); - - Packet.resize(SchedModel->getIssueWidth()); - Packet.clear(); - ResourcesModel->clearResources(); - } - - ~VLIWResourceModel() { - delete ResourcesModel; - } - - void resetPacketState() { - Packet.clear(); - } - - void resetDFA() { - ResourcesModel->clearResources(); - } - - void reset() { - Packet.clear(); - ResourcesModel->clearResources(); - } - - bool isResourceAvailable(SUnit *SU, bool IsTop); - bool reserveResources(SUnit *SU, bool IsTop); - unsigned getTotalPackets() const { return TotalPackets; } - bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); } + using VLIWResourceModel::VLIWResourceModel; + bool hasDependence(const SUnit *SUd, const SUnit *SUu) override; }; -/// Extend the standard ScheduleDAGMI to provide more context and override the -/// top-level schedule() driver. -class VLIWMachineScheduler : public ScheduleDAGMILive { -public: - VLIWMachineScheduler(MachineSchedContext *C, - std::unique_ptr S) - : ScheduleDAGMILive(C, std::move(S)) {} - - /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's - /// time to do some work. - void schedule() override; - - RegisterClassInfo *getRegClassInfo() { return RegClassInfo; } - int getBBSize() { return BB->size(); } -}; - -//===----------------------------------------------------------------------===// -// ConvergingVLIWScheduler - Implementation of the standard -// MachineSchedStrategy. -//===----------------------------------------------------------------------===// - -/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics -/// to balance the schedule. -class ConvergingVLIWScheduler : public MachineSchedStrategy { - /// Store the state used by ConvergingVLIWScheduler heuristics, required - /// for the lifetime of one invocation of pickNode(). - struct SchedCandidate { - // The best SUnit candidate. - SUnit *SU = nullptr; - - // Register pressure values for the best candidate. - RegPressureDelta RPDelta; - - // Best scheduling cost. - int SCost = 0; - - SchedCandidate() = default; - }; - /// Represent the type of SchedCandidate found within a single queue. - enum CandResult { - NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, - BestCost, Weak}; - - /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in whichever direction at has moved, and maintains the state - /// of "hazards" and other interlocks at the current cycle. - struct VLIWSchedBoundary { - VLIWMachineScheduler *DAG = nullptr; - const TargetSchedModel *SchedModel = nullptr; - - ReadyQueue Available; - ReadyQueue Pending; - bool CheckPending = false; - - ScheduleHazardRecognizer *HazardRec = nullptr; - VLIWResourceModel *ResourceModel = nullptr; - - unsigned CurrCycle = 0; - unsigned IssueCount = 0; - unsigned CriticalPathLength = 0; - - /// MinReadyCycle - Cycle of the soonest available instruction. - unsigned MinReadyCycle = std::numeric_limits::max(); - - // Remember the greatest min operand latency. - unsigned MaxMinLatency = 0; - - /// Pending queues extend the ready queues with the same ID and the - /// PendingFlag set. - VLIWSchedBoundary(unsigned ID, const Twine &Name) - : Available(ID, Name+".A"), - Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P") {} - - ~VLIWSchedBoundary() { - delete ResourceModel; - delete HazardRec; - } - - void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { - DAG = dag; - SchedModel = smodel; - CurrCycle = 0; - IssueCount = 0; - // Initialize the critical path length limit, which used by the scheduling - // cost model to determine the value for scheduling an instruction. We use - // a slightly different heuristic for small and large functions. For small - // functions, it's important to use the height/depth of the instruction. - // For large functions, prioritizing by height or depth increases spills. - CriticalPathLength = DAG->getBBSize() / SchedModel->getIssueWidth(); - if (DAG->getBBSize() < 50) - // We divide by two as a cheap and simple heuristic to reduce the - // critcal path length, which increases the priority of using the graph - // height/depth in the scheduler's cost computation. - CriticalPathLength >>= 1; - else { - // For large basic blocks, we prefer a larger critical path length to - // decrease the priority of using the graph height/depth. - unsigned MaxPath = 0; - for (auto &SU : DAG->SUnits) - MaxPath = std::max(MaxPath, isTop() ? SU.getHeight() : SU.getDepth()); - CriticalPathLength = std::max(CriticalPathLength, MaxPath) + 1; - } - } - - bool isTop() const { - return Available.getID() == ConvergingVLIWScheduler::TopQID; - } - - bool checkHazard(SUnit *SU); - - void releaseNode(SUnit *SU, unsigned ReadyCycle); - - void bumpCycle(); - - void bumpNode(SUnit *SU); - - void releasePending(); - - void removeReady(SUnit *SU); - - SUnit *pickOnlyChoice(); - - bool isLatencyBound(SUnit *SU) { - if (CurrCycle >= CriticalPathLength) - return true; - unsigned PathLength = isTop() ? SU->getHeight() : SU->getDepth(); - return CriticalPathLength - CurrCycle <= PathLength; - } - }; - - VLIWMachineScheduler *DAG = nullptr; - const TargetSchedModel *SchedModel = nullptr; - - // State of the top and bottom scheduled instruction boundaries. - VLIWSchedBoundary Top; - VLIWSchedBoundary Bot; - - /// List of pressure sets that have a high pressure level in the region. - std::vector HighPressureSets; - -public: - /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) - enum { - TopQID = 1, - BotQID = 2, - LogMaxQID = 2 - }; - - ConvergingVLIWScheduler() : Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} - - void initialize(ScheduleDAGMI *dag) override; - - SUnit *pickNode(bool &IsTopNode) override; - - void schedNode(SUnit *SU, bool IsTopNode) override; - - void releaseTopNode(SUnit *SU) override; - - void releaseBottomNode(SUnit *SU) override; - - unsigned reportPackets() { - return Top.ResourceModel->getTotalPackets() + - Bot.ResourceModel->getTotalPackets(); - } - +class HexagonConvergingVLIWScheduler : public ConvergingVLIWScheduler { protected: - SUnit *pickNodeBidrectional(bool &IsTopNode); - - int pressureChange(const SUnit *SU, bool isBotUp); - - int SchedulingCost(ReadyQueue &Q, - SUnit *SU, SchedCandidate &Candidate, - RegPressureDelta &Delta, bool verbose); - - CandResult pickNodeFromQueue(VLIWSchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); -#ifndef NDEBUG - void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, - int Cost, PressureChange P = PressureChange()); - - void readyQueueVerboseDump(const RegPressureTracker &RPTracker, - SchedCandidate &Candidate, ReadyQueue &Q); -#endif + VLIWResourceModel * + createVLIWResourceModel(const TargetSubtargetInfo &STI, + const TargetSchedModel *SchedModel) const override; + int SchedulingCost(ReadyQueue &Q, SUnit *SU, SchedCandidate &Candidate, + RegPressureDelta &Delta, bool verbose) override; }; } // end namespace llvm Index: llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -14,676 +14,44 @@ #include "HexagonMachineScheduler.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include +#include "llvm/CodeGen/VLIWMachineScheduler.h" using namespace llvm; #define DEBUG_TYPE "machine-scheduler" -static cl::opt IgnoreBBRegPressure("ignore-bb-reg-pressure", - cl::Hidden, cl::ZeroOrMore, cl::init(false)); - -static cl::opt UseNewerCandidate("use-newer-candidate", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); - -static cl::opt SchedDebugVerboseLevel("misched-verbose-level", - cl::Hidden, cl::ZeroOrMore, cl::init(1)); - -// Check if the scheduler should penalize instructions that are available to -// early due to a zero-latency dependence. -static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, - cl::ZeroOrMore, cl::init(true)); - -// This value is used to determine if a register class is a high pressure set. -// We compute the maximum number of registers needed and divided by the total -// available. Then, we compare the result to this value. -static cl::opt RPThreshold("hexagon-reg-pressure", cl::Hidden, - cl::init(0.75f), cl::desc("High register pressure threhold.")); - /// Return true if there is a dependence between SUd and SUu. -static bool hasDependence(const SUnit *SUd, const SUnit *SUu, - const HexagonInstrInfo &QII) { - if (SUd->Succs.size() == 0) - return false; +bool HexagonVLIWResourceModel::hasDependence(const SUnit *SUd, + const SUnit *SUu) { + const auto *QII = static_cast(TII); // Enable .cur formation. - if (QII.mayBeCurLoad(*SUd->getInstr())) + if (QII->mayBeCurLoad(*SUd->getInstr())) return false; - if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr())) - return false; - - for (const auto &S : SUd->Succs) { - // Since we do not add pseudos to packets, might as well - // ignore order dependencies. - if (S.isCtrl()) - continue; - - if (S.getSUnit() == SUu && S.getLatency() > 0) - return true; - } - return false; -} - -/// Check if scheduling of this SU is possible -/// in the current packet. -/// It is _not_ precise (statefull), it is more like -/// another heuristic. Many corner cases are figured -/// empirically. -bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { - if (!SU || !SU->getInstr()) + if (QII->canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr())) return false; - // First see if the pipeline could receive this instruction - // in the current cycle. - switch (SU->getInstr()->getOpcode()) { - default: - if (!ResourcesModel->canReserveResources(*SU->getInstr())) - return false; - break; - case TargetOpcode::EXTRACT_SUBREG: - case TargetOpcode::INSERT_SUBREG: - case TargetOpcode::SUBREG_TO_REG: - case TargetOpcode::REG_SEQUENCE: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::COPY: - case TargetOpcode::INLINEASM: - case TargetOpcode::INLINEASM_BR: - break; - } - - MachineBasicBlock *MBB = SU->getInstr()->getParent(); - auto &QST = MBB->getParent()->getSubtarget(); - const auto &QII = *QST.getInstrInfo(); - - // Now see if there are no other dependencies to instructions already - // in the packet. - if (IsTop) { - for (unsigned i = 0, e = Packet.size(); i != e; ++i) - if (hasDependence(Packet[i], SU, QII)) - return false; - } else { - for (unsigned i = 0, e = Packet.size(); i != e; ++i) - if (hasDependence(SU, Packet[i], QII)) - return false; - } - return true; -} - -/// Keep track of available resources. -bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { - bool startNewCycle = false; - // Artificially reset state. - if (!SU) { - ResourcesModel->clearResources(); - Packet.clear(); - TotalPackets++; - return false; - } - // If this SU does not fit in the packet or the packet is now full - // start a new one. - if (!isResourceAvailable(SU, IsTop) || - Packet.size() >= SchedModel->getIssueWidth()) { - ResourcesModel->clearResources(); - Packet.clear(); - TotalPackets++; - startNewCycle = true; - } - - switch (SU->getInstr()->getOpcode()) { - default: - ResourcesModel->reserveResources(*SU->getInstr()); - break; - case TargetOpcode::EXTRACT_SUBREG: - case TargetOpcode::INSERT_SUBREG: - case TargetOpcode::SUBREG_TO_REG: - case TargetOpcode::REG_SEQUENCE: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::COPY: - case TargetOpcode::INLINEASM: - case TargetOpcode::INLINEASM_BR: - break; - } - Packet.push_back(SU); - -#ifndef NDEBUG - LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); - for (unsigned i = 0, e = Packet.size(); i != e; ++i) { - LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); - LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); - LLVM_DEBUG(Packet[i]->getInstr()->dump()); - } -#endif - - return startNewCycle; + return VLIWResourceModel::hasDependence(SUd, SUu); } -/// schedule - Called back from MachineScheduler::runOnMachineFunction -/// after setting up the current scheduling region. [RegionBegin, RegionEnd) -/// only includes instructions that have DAG nodes, not scheduling boundaries. -void VLIWMachineScheduler::schedule() { - LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " - << printMBBReference(*BB) << " " << BB->getName() - << " in_func " << BB->getParent()->getName() - << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); - - buildDAGWithRegPressure(); - - Topo.InitDAGTopologicalSorting(); - - // Postprocess the DAG to add platform-specific artificial dependencies. - postprocessDAG(); - - SmallVector TopRoots, BotRoots; - findRootsAndBiasEdges(TopRoots, BotRoots); - - // Initialize the strategy before modifying the DAG. - SchedImpl->initialize(this); - - LLVM_DEBUG(unsigned maxH = 0; - for (unsigned su = 0, e = SUnits.size(); su != e; - ++su) if (SUnits[su].getHeight() > maxH) maxH = - SUnits[su].getHeight(); - dbgs() << "Max Height " << maxH << "\n";); - LLVM_DEBUG(unsigned maxD = 0; - for (unsigned su = 0, e = SUnits.size(); su != e; - ++su) if (SUnits[su].getDepth() > maxD) maxD = - SUnits[su].getDepth(); - dbgs() << "Max Depth " << maxD << "\n";); - LLVM_DEBUG(dump()); - - initQueues(TopRoots, BotRoots); - - bool IsTopNode = false; - while (true) { - LLVM_DEBUG( - dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); - SUnit *SU = SchedImpl->pickNode(IsTopNode); - if (!SU) break; - - if (!checkSchedLimit()) - break; - - scheduleMI(SU, IsTopNode); - - // Notify the scheduling strategy after updating the DAG. - SchedImpl->schedNode(SU, IsTopNode); - - updateQueues(SU, IsTopNode); - } - assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); - - placeDebugValues(); - - LLVM_DEBUG({ - dbgs() << "*** Final schedule for " - << printMBBReference(*begin()->getParent()) << " ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); +VLIWResourceModel *HexagonConvergingVLIWScheduler::createVLIWResourceModel( + const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { + return new HexagonVLIWResourceModel(STI, SchedModel); } -void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { - DAG = static_cast(dag); - SchedModel = DAG->getSchedModel(); - - Top.init(DAG, SchedModel); - Bot.init(DAG, SchedModel); - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or - // are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); - const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); - const TargetInstrInfo *TII = STI.getInstrInfo(); - delete Top.HazardRec; - delete Bot.HazardRec; - Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); - Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); - - delete Top.ResourceModel; - delete Bot.ResourceModel; - Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); - Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); - - const std::vector &MaxPressure = - DAG->getRegPressure().MaxSetPressure; - HighPressureSets.assign(MaxPressure.size(), 0); - for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { - unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); - HighPressureSets[i] = - ((float) MaxPressure[i] > ((float) Limit * RPThreshold)); - } - - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); -} - -void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { - for (const SDep &PI : SU->Preds) { - unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; - unsigned MinLatency = PI.getLatency(); -#ifndef NDEBUG - Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + MinLatency) - SU->TopReadyCycle = PredReadyCycle + MinLatency; - } - - if (!SU->isScheduled) - Top.releaseNode(SU, SU->TopReadyCycle); -} - -void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned MinLatency = I->getLatency(); -#ifndef NDEBUG - Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) - SU->BotReadyCycle = SuccReadyCycle + MinLatency; - } - - if (!SU->isScheduled) - Bot.releaseNode(SU, SU->BotReadyCycle); -} - -/// Does this SU have a hazard within the current instruction group. -/// -/// The scheduler supports two modes of hazard recognition. The first is the -/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that -/// supports highly complicated in-order reservation tables -/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. -/// -/// The second is a streamlined mechanism that checks for hazards based on -/// simple counters that the scheduler itself maintains. It explicitly checks -/// for instruction dispatch limitations, including the number of micro-ops that -/// can dispatch per cycle. -/// -/// TODO: Also check whether the SU must start a new group. -bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { - if (HazardRec->isEnabled()) - return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; - - unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if (IssueCount + uops > SchedModel->getIssueWidth()) - return true; - - return false; -} - -void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { - if (ReadyCycle < MinReadyCycle) - MinReadyCycle = ReadyCycle; - - // Check for interlocks first. For the purpose of other heuristics, an - // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (ReadyCycle > CurrCycle || checkHazard(SU)) - - Pending.push(SU); - else - Available.push(SU); -} - -/// Move the boundary of scheduled code by one cycle. -void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { - unsigned Width = SchedModel->getIssueWidth(); - IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; - - assert(MinReadyCycle < std::numeric_limits::max() && - "MinReadyCycle uninitialized"); - unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); - - if (!HazardRec->isEnabled()) { - // Bypass HazardRec virtual calls. - CurrCycle = NextCycle; - } else { - // Bypass getHazardType calls in case of long latency. - for (; CurrCycle != NextCycle; ++CurrCycle) { - if (isTop()) - HazardRec->AdvanceCycle(); - else - HazardRec->RecedeCycle(); - } - } - CheckPending = true; - - LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " - << CurrCycle << '\n'); -} - -/// Move the boundary of scheduled code by one SUnit. -void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { - bool startNewCycle = false; - - // Update the reservation table. - if (HazardRec->isEnabled()) { - if (!isTop() && SU->isCall) { - // Calls are scheduled with their preceding instructions. For bottom-up - // scheduling, clear the pipeline state before emitting. - HazardRec->Reset(); - } - HazardRec->EmitInstruction(SU); - } - - // Update DFA model. - startNewCycle = ResourceModel->reserveResources(SU, isTop()); - - // Check the instruction group dispatch limit. - // TODO: Check if this SU must end a dispatch group. - IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); - if (startNewCycle) { - LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); - bumpCycle(); - } - else - LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " - << CurrCycle << '\n'); -} - -/// Release pending ready nodes in to the available queue. This makes them -/// visible to heuristics. -void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { - // If the available queue is empty, it is safe to reset MinReadyCycle. - if (Available.empty()) - MinReadyCycle = std::numeric_limits::max(); - - // Check to see if any of the pending instructions are ready to issue. If - // so, add them to the available queue. - for (unsigned i = 0, e = Pending.size(); i != e; ++i) { - SUnit *SU = *(Pending.begin()+i); - unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; - - if (ReadyCycle < MinReadyCycle) - MinReadyCycle = ReadyCycle; - - if (ReadyCycle > CurrCycle) - continue; - - if (checkHazard(SU)) - continue; - - Available.push(SU); - Pending.remove(Pending.begin()+i); - --i; --e; - } - CheckPending = false; -} - -/// Remove SU from the ready set for this boundary. -void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { - if (Available.isInQueue(SU)) - Available.remove(Available.find(SU)); - else { - assert(Pending.isInQueue(SU) && "bad ready count"); - Pending.remove(Pending.find(SU)); - } -} +int HexagonConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + int ResCount = + ConvergingVLIWScheduler::SchedulingCost(Q, SU, Candidate, Delta, verbose); -/// If this queue only has one ready candidate, return it. As a side effect, -/// advance the cycle until at least one node is ready. If multiple instructions -/// are ready, return NULL. -SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { - if (CheckPending) - releasePending(); - - auto AdvanceCycle = [this]() { - if (Available.empty()) - return true; - if (Available.size() == 1 && Pending.size() > 0) - return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || - getWeakLeft(*Available.begin(), isTop()) != 0; - return false; - }; - for (unsigned i = 0; AdvanceCycle(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && - "permanent hazard"); (void)i; - ResourceModel->reserveResources(nullptr, isTop()); - bumpCycle(); - releasePending(); - } - if (Available.size() == 1) - return *Available.begin(); - return nullptr; -} - -#ifndef NDEBUG -void ConvergingVLIWScheduler::traceCandidate(const char *Label, - const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) { - dbgs() << Label << " " << Q.getName() << " "; - if (P.isValid()) - dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" - << P.getUnitInc() << " "; - else - dbgs() << " "; - dbgs() << "cost(" << Cost << ")\t"; - DAG->dumpNode(*SU); -} - -// Very detailed queue dump, to be used with higher verbosity levels. -void ConvergingVLIWScheduler::readyQueueVerboseDump( - const RegPressureTracker &RPTracker, SchedCandidate &Candidate, - ReadyQueue &Q) { - RegPressureTracker &TempTracker = const_cast(RPTracker); - - dbgs() << ">>> " << Q.getName() << "\n"; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { - RegPressureDelta RPDelta; - TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); - std::stringstream dbgstr; - dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; - dbgs() << dbgstr.str(); - SchedulingCost(Q, *I, Candidate, RPDelta, true); - dbgs() << "\t"; - (*I)->getInstr()->dump(); - } - dbgs() << "\n"; -} -#endif - -/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor -/// of SU, return true (we may have duplicates) -static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { - if (SU->NumPredsLeft == 0) - return false; - - for (auto &Pred : SU->Preds) { - // We found an available, but not scheduled, predecessor. - if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) - return false; - } - - return true; -} - -/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor -/// of SU, return true (we may have duplicates) -static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { - if (SU->NumSuccsLeft == 0) - return false; - - for (auto &Succ : SU->Succs) { - // We found an available, but not scheduled, successor. - if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) - return false; - } - return true; -} - -/// Check if the instruction changes the register pressure of a register in the -/// high pressure set. The function returns a negative value if the pressure -/// decreases and a positive value is the pressure increases. If the instruction -/// doesn't use a high pressure register or doesn't change the register -/// pressure, then return 0. -int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { - PressureDiff &PD = DAG->getPressureDiff(SU); - for (auto &P : PD) { - if (!P.isValid()) - continue; - // The pressure differences are computed bottom-up, so the comparision for - // an increase is positive in the bottom direction, but negative in the - // top-down direction. - if (HighPressureSets[P.getPSet()]) - return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); - } - return 0; -} - -// Constants used to denote relative importance of -// heuristic components for cost computation. -static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 50; -static const unsigned PriorityThree = 75; -static const unsigned ScaleTwo = 10; - -/// Single point to compute overall scheduling cost. -/// TODO: More heuristics will be used soon. -int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, - SchedCandidate &Candidate, - RegPressureDelta &Delta, - bool verbose) { - // Initial trivial priority. - int ResCount = 1; - - // Do not waste time on a node that is already scheduled. if (!SU || SU->isScheduled) return ResCount; - LLVM_DEBUG(if (verbose) dbgs() - << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); - // Forced priority is high. - if (SU->isScheduleHigh) { - ResCount += PriorityOne; - LLVM_DEBUG(dbgs() << "H|"); - } - - unsigned IsAvailableAmt = 0; - // Critical path first. - if (Q.getID() == TopQID) { - if (Top.isLatencyBound(SU)) { - LLVM_DEBUG(if (verbose) dbgs() << "LB|"); - ResCount += (SU->getHeight() * ScaleTwo); - } - - LLVM_DEBUG(if (verbose) { - std::stringstream dbgstr; - dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; - dbgs() << dbgstr.str(); - }); - - // If resources are available for it, multiply the - // chance of scheduling. - if (Top.ResourceModel->isResourceAvailable(SU, true)) { - IsAvailableAmt = (PriorityTwo + PriorityThree); - ResCount += IsAvailableAmt; - LLVM_DEBUG(if (verbose) dbgs() << "A|"); - } else - LLVM_DEBUG(if (verbose) dbgs() << " |"); - } else { - if (Bot.isLatencyBound(SU)) { - LLVM_DEBUG(if (verbose) dbgs() << "LB|"); - ResCount += (SU->getDepth() * ScaleTwo); - } - - LLVM_DEBUG(if (verbose) { - std::stringstream dbgstr; - dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; - dbgs() << dbgstr.str(); - }); - - // If resources are available for it, multiply the - // chance of scheduling. - if (Bot.ResourceModel->isResourceAvailable(SU, false)) { - IsAvailableAmt = (PriorityTwo + PriorityThree); - ResCount += IsAvailableAmt; - LLVM_DEBUG(if (verbose) dbgs() << "A|"); - } else - LLVM_DEBUG(if (verbose) dbgs() << " |"); - } - - unsigned NumNodesBlocking = 0; - if (Q.getID() == TopQID) { - // How many SUs does it block from scheduling? - // Look at all of the successors of this node. - // Count the number of nodes that - // this node is the sole unscheduled node for. - if (Top.isLatencyBound(SU)) - for (const SDep &SI : SU->Succs) - if (isSingleUnscheduledPred(SI.getSUnit(), SU)) - ++NumNodesBlocking; - } else { - // How many unscheduled predecessors block this node? - if (Bot.isLatencyBound(SU)) - for (const SDep &PI : SU->Preds) - if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) - ++NumNodesBlocking; - } - ResCount += (NumNodesBlocking * ScaleTwo); - - LLVM_DEBUG(if (verbose) { - std::stringstream dbgstr; - dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; - dbgs() << dbgstr.str(); - }); - - // Factor in reg pressure as a heuristic. - if (!IgnoreBBRegPressure) { - // Decrease priority by the amount that register pressure exceeds the limit. - ResCount -= (Delta.Excess.getUnitInc()*PriorityOne); - // Decrease priority if register pressure exceeds the limit. - ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne); - // Decrease priority slightly if register pressure would increase over the - // current maximum. - ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); - // If there are register pressure issues, then we remove the value added for - // the instruction being available. The rationale is that we really don't - // want to schedule an instruction that causes a spill. - if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && - (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || - Delta.CurrentMax.getUnitInc())) - ResCount -= IsAvailableAmt; - LLVM_DEBUG(if (verbose) { - dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" - << Delta.CriticalMax.getUnitInc() << "/" - << Delta.CurrentMax.getUnitInc() << ")|"; - }); - } - - // Give a little extra priority to a .cur instruction if there is a resource - // available for it. auto &QST = DAG->MF.getSubtarget(); auto &QII = *QST.getInstrInfo(); if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) { @@ -698,303 +66,5 @@ } } - // Give preference to a zero latency instruction if the dependent - // instruction is in the current packet. - if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { - for (const SDep &PI : SU->Preds) { - if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && - PI.getLatency() == 0 && - Top.ResourceModel->isInPacket(PI.getSUnit())) { - ResCount += PriorityThree; - LLVM_DEBUG(if (verbose) dbgs() << "Z|"); - } - } - } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { - for (const SDep &SI : SU->Succs) { - if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && - SI.getLatency() == 0 && - Bot.ResourceModel->isInPacket(SI.getSUnit())) { - ResCount += PriorityThree; - LLVM_DEBUG(if (verbose) dbgs() << "Z|"); - } - } - } - - // If the instruction has a non-zero latency dependence with an instruction in - // the current packet, then it should not be scheduled yet. The case occurs - // when the dependent instruction is scheduled in a new packet, so the - // scheduler updates the current cycle and pending instructions become - // available. - if (CheckEarlyAvail) { - if (Q.getID() == TopQID) { - for (const auto &PI : SU->Preds) { - if (PI.getLatency() > 0 && - Top.ResourceModel->isInPacket(PI.getSUnit())) { - ResCount -= PriorityOne; - LLVM_DEBUG(if (verbose) dbgs() << "D|"); - } - } - } else { - for (const auto &SI : SU->Succs) { - if (SI.getLatency() > 0 && - Bot.ResourceModel->isInPacket(SI.getSUnit())) { - ResCount -= PriorityOne; - LLVM_DEBUG(if (verbose) dbgs() << "D|"); - } - } - } - } - - LLVM_DEBUG(if (verbose) { - std::stringstream dbgstr; - dbgstr << "Total " << std::setw(4) << ResCount << ")"; - dbgs() << dbgstr.str(); - }); - return ResCount; } - -/// Pick the best candidate from the top queue. -/// -/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during -/// DAG building. To adjust for the current scheduling location we need to -/// maintain the number of vreg uses remaining to be top-scheduled. -ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: -pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker, - SchedCandidate &Candidate) { - ReadyQueue &Q = Zone.Available; - LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) - readyQueueVerboseDump(RPTracker, Candidate, Q); - else Q.dump();); - - // getMaxPressureDelta temporarily modifies the tracker. - RegPressureTracker &TempTracker = const_cast(RPTracker); - - // BestSU remains NULL if no top candidates beat the best existing candidate. - CandResult FoundCandidate = NoCand; - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { - RegPressureDelta RPDelta; - TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); - - int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); - - // Initialize the candidate if needed. - if (!Candidate.SU) { - LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = NodeOrder; - continue; - } - - // Choose node order for negative cost candidates. There is no good - // candidate in this case. - if (CurrentCost < 0 && Candidate.SCost < 0) { - if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) - || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { - LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = NodeOrder; - } - continue; - } - - // Best cost. - if (CurrentCost > Candidate.SCost) { - LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = BestCost; - continue; - } - - // Choose an instruction that does not depend on an artificial edge. - unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); - unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); - if (CurrWeak != CandWeak) { - if (CurrWeak < CandWeak) { - LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = Weak; - } - continue; - } - - if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { - unsigned CurrSize, CandSize; - if (Q.getID() == TopQID) { - CurrSize = (*I)->Succs.size(); - CandSize = Candidate.SU->Succs.size(); - } else { - CurrSize = (*I)->Preds.size(); - CandSize = Candidate.SU->Preds.size(); - } - if (CurrSize > CandSize) { - LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = BestCost; - } - // Keep the old candidate if it's a better candidate. That is, don't use - // the subsequent tie breaker. - if (CurrSize != CandSize) - continue; - } - - // Tie breaker. - // To avoid scheduling indeterminism, we need a tie breaker - // for the case when cost is identical for two nodes. - if (UseNewerCandidate && CurrentCost == Candidate.SCost) { - if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) - || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { - LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = NodeOrder; - continue; - } - } - - // Fall through to original instruction order. - // Only consider node order if Candidate was chosen from this Q. - if (FoundCandidate == NoCand) - continue; - } - return FoundCandidate; -} - -/// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { - // Schedule as far as possible in the direction of no choice. This is most - // efficient, but also provides the best heuristics for CriticalPSets. - if (SUnit *SU = Bot.pickOnlyChoice()) { - LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); - IsTopNode = false; - return SU; - } - if (SUnit *SU = Top.pickOnlyChoice()) { - LLVM_DEBUG(dbgs() << "Picked only Top\n"); - IsTopNode = true; - return SU; - } - SchedCandidate BotCand; - // Prefer bottom scheduling when heuristics are silent. - CandResult BotResult = pickNodeFromQueue(Bot, - DAG->getBotRPTracker(), BotCand); - assert(BotResult != NoCand && "failed to find the first candidate"); - - // If either Q has a single candidate that provides the least increase in - // Excess pressure, we can immediately schedule from that Q. - // - // RegionCriticalPSets summarizes the pressure within the scheduled region and - // affects picking from either Q. If scheduling in one direction must - // increase pressure for one of the excess PSets, then schedule in that - // direction first to provide more freedom in the other direction. - if (BotResult == SingleExcess || BotResult == SingleCritical) { - LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); - IsTopNode = false; - return BotCand.SU; - } - // Check if the top Q has a better candidate. - SchedCandidate TopCand; - CandResult TopResult = pickNodeFromQueue(Top, - DAG->getTopRPTracker(), TopCand); - assert(TopResult != NoCand && "failed to find the first candidate"); - - if (TopResult == SingleExcess || TopResult == SingleCritical) { - LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); - IsTopNode = true; - return TopCand.SU; - } - // If either Q has a single candidate that minimizes pressure above the - // original region's pressure pick it. - if (BotResult == SingleMax) { - LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); - IsTopNode = false; - return BotCand.SU; - } - if (TopResult == SingleMax) { - LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); - IsTopNode = true; - return TopCand.SU; - } - if (TopCand.SCost > BotCand.SCost) { - LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); - IsTopNode = true; - return TopCand.SU; - } - // Otherwise prefer the bottom candidate in node order. - LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); - IsTopNode = false; - return BotCand.SU; -} - -/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. -SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { - if (DAG->top() == DAG->bottom()) { - assert(Top.Available.empty() && Top.Pending.empty() && - Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); - return nullptr; - } - SUnit *SU; - if (ForceTopDown) { - SU = Top.pickOnlyChoice(); - if (!SU) { - SchedCandidate TopCand; - CandResult TopResult = - pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopResult != NoCand && "failed to find the first candidate"); - (void)TopResult; - SU = TopCand.SU; - } - IsTopNode = true; - } else if (ForceBottomUp) { - SU = Bot.pickOnlyChoice(); - if (!SU) { - SchedCandidate BotCand; - CandResult BotResult = - pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotResult != NoCand && "failed to find the first candidate"); - (void)BotResult; - SU = BotCand.SU; - } - IsTopNode = false; - } else { - SU = pickNodeBidrectional(IsTopNode); - } - if (SU->isTopReady()) - Top.removeReady(SU); - if (SU->isBottomReady()) - Bot.removeReady(SU); - - LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") - << " Scheduling instruction in cycle " - << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" - << reportPackets() << ")\n"; - DAG->dumpNode(*SU)); - return SU; -} - -/// Update the scheduler's state after scheduling a node. This is the same node -/// that was just returned by pickNode(). However, VLIWMachineScheduler needs -/// to update it's state based on the current cycle before MachineSchedStrategy -/// does. -void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { - if (IsTopNode) { - Top.bumpNode(SU); - SU->TopReadyCycle = Top.CurrCycle; - } else { - Bot.bumpNode(SU); - SU->BotReadyCycle = Bot.CurrCycle; - } -} Index: llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -21,6 +21,7 @@ #include "TargetInfo/HexagonTargetInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/VLIWMachineScheduler.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/MC/TargetRegistry.h" @@ -120,8 +121,8 @@ int HexagonTargetMachineModule = 0; static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { - ScheduleDAGMILive *DAG = - new VLIWMachineScheduler(C, std::make_unique()); + ScheduleDAGMILive *DAG = new VLIWMachineScheduler( + C, std::make_unique()); DAG->addMutation(std::make_unique()); DAG->addMutation(std::make_unique()); DAG->addMutation(std::make_unique());