Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -217,6 +217,9 @@ /// Initialize the strategy after building the DAG for a new region. virtual void initialize(ScheduleDAGMI *DAG) = 0; + /// Tell the strategy that current MBB is done. + virtual void leaveMBB(MachineBasicBlock *MBB) {}; + /// Notify this strategy that all roots have been released (including those /// that depend on EntrySU or ExitSU). virtual void registerRoots() {} @@ -326,6 +329,8 @@ /// reorderable instructions. void schedule() override; + void finishBlock() override; + /// Change the position of an instruction within the basic block and update /// live ranges and region boundary iterators. void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); Index: include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- include/llvm/Target/TargetSubtargetInfo.h +++ include/llvm/Target/TargetSubtargetInfo.h @@ -163,6 +163,11 @@ /// which is the preferred way to influence this. virtual bool enablePostRAScheduler() const; + // If this method returns true, handling of the scheduling regions + // themselves (in case of a scheduling boundary in MBB) will be done + // beginning with the topmost region of MBB. + virtual bool doMBBSchedRegionsTopDown() const { return false; } + /// \brief True if the subtarget should run the atomic expansion pass. virtual bool enableAtomicExpand() const; Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -405,6 +405,7 @@ // Initialize the context of the pass. MF = &mf; + MLI = &getAnalysis(); PassConfig = &getAnalysis(); if (VerifyScheduling) @@ -437,11 +438,59 @@ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); } +// A region of an MBB for scheduling. +struct SchedRegion { + MachineBasicBlock::iterator RegionBegin; + MachineBasicBlock::iterator RegionEnd; + unsigned NumRegionInstrs; + SchedRegion(MachineBasicBlock::iterator b, MachineBasicBlock::iterator e, + unsigned n) : + RegionBegin(b), RegionEnd(e), NumRegionInstrs(n) {} +}; + +static void +getSchedRegions(MachineBasicBlock *MBB, + std::vector &Regions) { + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + MachineBasicBlock::iterator I = nullptr; + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = I) { + + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() || + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { + --RegionEnd; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; + I = RegionEnd; + for (;I != MBB->begin(); --I) { + MachineInstr &MI = *std::prev(I); + if (isSchedBoundary(&MI, &*MBB, MF, TII)) + break; + if (!MI.isDebugValue()) + ++NumRegionInstrs; + } + + Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); + } + + // TODO: + // * Should IsPostRA be passed here (currently SystemZ only does + // post-ra scheduling, but that's hopefully changed soon, and don't know if + // this will be done pre-ra)? + // * Should this be done more efficiently (copying objects)? + if (MF->getSubtarget().doMBBSchedRegionsTopDown()) + std::reverse(Regions.begin(), Regions.end()); +} + /// Main driver for both MachineScheduler and PostMachineScheduler. void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags) { - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - // Visit all machine basic blocks. // // TODO: Visit blocks in global postorder or postorder within the bottom-up @@ -472,26 +521,15 @@ // // MBB::size() uses instr_iterator to count. Here we need a bundle to count // as a single instruction. - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { - // Avoid decrementing RegionEnd for blocks with no terminator. - if (RegionEnd != MBB->end() || - isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { - --RegionEnd; - } + std::vector MBBRegions; + getSchedRegions(&*MBB, MBBRegions); + for (std::vector::iterator R = MBBRegions.begin(); + R != MBBRegions.end(); ++R) { + MachineBasicBlock::iterator I = R->RegionBegin; + MachineBasicBlock::iterator RegionEnd = R->RegionEnd; + unsigned NumRegionInstrs = R->NumRegionInstrs; - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - unsigned NumRegionInstrs = 0; - MachineBasicBlock::iterator I = RegionEnd; - for (; I != MBB->begin(); --I) { - MachineInstr &MI = *std::prev(I); - if (isSchedBoundary(&MI, &*MBB, MF, TII)) - break; - if (!MI.isDebugValue()) - ++NumRegionInstrs; - } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); @@ -517,15 +555,11 @@ } // Schedule a region: possibly reorder instructions. - // This invalidates 'RegionEnd' and 'I'. + // This invalidates the original region iterators. Scheduler.schedule(); // Close the current region. Scheduler.exitRegion(); - - // Scheduling has invalidated the current iterator 'I'. Ask the - // scheduler for the top of it's scheduled region. - RegionEnd = Scheduler.begin(); } Scheduler.finishBlock(); // FIXME: Ideally, no further passes should rely on kill flags. However, @@ -654,6 +688,10 @@ } } +void ScheduleDAGMI::finishBlock() { + SchedImpl->leaveMBB(BB); +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions Index: lib/Target/SystemZ/SystemZHazardRecognizer.h =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.h +++ lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H @@ -35,10 +42,10 @@ namespace llvm { -/// SystemZHazardRecognizer maintains the state during scheduling. +/// SystemZHazardRecognizer maintains the state for one MBB during scheduling. class SystemZHazardRecognizer : public ScheduleHazardRecognizer { - ScheduleDAGMI *DAG; + const SystemZInstrInfo *TII; const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current @@ -88,18 +95,28 @@ /// ops, return true if it seems good to schedule an FPd op next. bool isFPdOpPreferred_distance(const SUnit *SU); + /// Wrap a non-scheduled instruction in an SU and emit it. + void emitInstruction(MachineInstr *MI); + + /// Last emitted instruction or nullptr. + MachineInstr *LastEmittedMI; + public: - SystemZHazardRecognizer(const MachineSchedContext *C); + SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetSchedModel *SM) + : TII(tii), SchedModel(SM) { Reset(); } - void setDAG(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = dag->getSchedModel(); - } - HazardType getHazardType(SUnit *m, int Stalls = 0) override; void Reset() override; void EmitInstruction(SUnit *SU) override; + /// Resolves and cache a resolved scheduling class for an SUnit. + const MCSchedClassDesc *getSchedClass(SUnit *SU) const { + if (!SU->SchedClass && SchedModel->hasInstrSchedModel()) + SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + return SU->SchedClass; + } + // Cost functions used by SystemZPostRASchedStrategy while // evaluating candidates. @@ -121,6 +138,20 @@ void dumpCurrGroup(std::string Msg = "") const; void dumpProcResourceCounters() const; #endif + + MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; } + + /// Copy counters from end of single predecessor. + void copyCounters(SystemZHazardRecognizer *Incoming); + + /// Update the scheduler state by emitting (non-scheduled) instructions + /// from I to NextBegin. + void advance(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator NextBegin); + + /// Emit a branch in a predecessor, and return true if it is a taken branch + /// to (current) MBB. + bool emitIncomingBranch(MachineInstr *MI, MachineBasicBlock *MBB); }; } // namespace llvm Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -36,13 +43,9 @@ "resources during scheduling."), cl::init(8)); -SystemZHazardRecognizer:: -SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), - SchedModel(nullptr) {} - unsigned SystemZHazardRecognizer:: getNumDecoderSlots(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. @@ -73,12 +76,13 @@ clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; + LastEmittedMI = nullptr; DEBUG(CurGroupDbg = "";); } bool SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return true; @@ -125,9 +129,9 @@ #ifndef NDEBUG // Debug output void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "SU(" << SU->NodeNum << "):"; - OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); + OS << TII->getName(SU->getInstr()->getOpcode()); - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; @@ -203,7 +207,7 @@ // Update state with SU as the next scheduled unit. void SystemZHazardRecognizer:: EmitInstruction(SUnit *SU) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); DEBUG( dumpCurrGroup("Decode group before emission");); // If scheduling an SU that must begin a new decoder group, move on @@ -218,8 +222,10 @@ cgd << ", "; dumpSU(SU, cgd);); + LastEmittedMI = SU->getInstr(); + // After returning from a call, we don't know much about the state. - if (SU->getInstr()->isCall()) { + if (SU->isCall) { DEBUG (dbgs() << "+++ Clearing state after call.\n";); clearProcResCounters(); LastFPdOpCycleIdx = UINT_MAX; @@ -271,7 +277,7 @@ } int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -315,7 +321,7 @@ resourcesCost(SUnit *SU) { int Cost = 0; - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -335,3 +341,82 @@ return Cost; } +void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI) { + // Make a temporary SUnit. + SUnit SU(MI, 0); + + // Set interesting flags. + SU.isCall = MI->isCall(); + + const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { + case 0: + SU.hasReservedResource = true; + break; + case 1: + SU.isUnbuffered = true; + break; + default: + break; + } + } + + EmitInstruction(&SU); +} + +void SystemZHazardRecognizer:: +copyCounters(SystemZHazardRecognizer *Incoming) { + // Current decoder group + CurrGroupSize = Incoming->CurrGroupSize; + DEBUG (CurGroupDbg = Incoming->CurGroupDbg;); + + // Processor resources + ProcResourceCounters = Incoming->ProcResourceCounters; + CriticalResourceIdx = Incoming->CriticalResourceIdx; + + // FPd + LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; + GrpCount = Incoming->GrpCount; +} + +void SystemZHazardRecognizer:: +advance(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator NextBegin) { + for (; I != NextBegin; ++I) { + if (I->isPosition() || I->isDebugValue()) + continue; + emitInstruction(&*I); + } +} + +bool SystemZHazardRecognizer::emitIncomingBranch(MachineInstr *MI, + MachineBasicBlock *MBB) { + DEBUG (dbgs() << "+++ Emitting incoming branch: "; MI->dump();); + + emitInstruction(MI); + + // Be optimistic and assume that branch prediction will generally do "the + // right thing". + + if (MI->isBranch() && + (TII->getBranchInfo(*MI).Target->isReg() || // Relative branch + TII->getBranchInfo(*MI).Target->getMBB() == MBB)) { + // Taken branch from predecessor + if (CurrGroupSize > 0) + nextGroup(false /*DbgOutput*/); + return true; + } + + assert ((MI->isBranch() || MI->isReturn() || + MI->getOpcode() == SystemZ::CondTrap) && + "Scheduler: expected a branch or conditional return/trap"); + + // NT branches end group after first decoder slot. + if (CurrGroupSize == 2) + nextGroup(false /*DbgOutput*/); + + return false; +} Index: lib/Target/SystemZ/SystemZMachineScheduler.h =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.h +++ lib/Target/SystemZ/SystemZMachineScheduler.h @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -28,7 +29,14 @@ /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + + const MachineLoopInfo *MLI; + const SystemZInstrInfo *TII; + + // A SchedModel is needed before any DAG is built while advancing past + // non-scheduled instructions, so it would not always be possible to call + // DAG->getSchedClass(SU). + TargetSchedModel SchedModel; /// A candidate during instruction evaluation. struct Candidate { @@ -79,12 +87,39 @@ /// The set of available SUs to schedule next. SUSet Available; - // HazardRecognizer that tracks the scheduler state for the current - // region. - SystemZHazardRecognizer HazardRec; - + /// Current MBB + MachineBasicBlock *MBB; + + // Start and End of current region + MachineBasicBlock::iterator CurrBegin; + + /// Maintain hazard recognizers for all blocks, so that the scheduler state + /// can be maintained past BB boundaries when appropariate. + typedef std::map MBB2HazRec; + MBB2HazRec SchedStates; + + /// Pointer to the HazardRecognizer that tracks the scheduler state for + /// the current region. + SystemZHazardRecognizer *HazardRec; + + /// A temporary HazardRecognizer used for regions that are separated (by a + /// call) from the bottom-most region of the MBB. + SystemZHazardRecognizer *TmpHazRec; + + /// Create a HazardRec for each MBB and save it in SchedStates, and set + /// HazardRec to point to it. + void setupHazardRecForScheduling(); + + void transferStateFromPred(); + public: SystemZPostRASchedStrategy(const MachineSchedContext *C); + virtual ~SystemZPostRASchedStrategy(); + + /// Called for a region before scheduling. + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; /// PostRA scheduling does not track pressure. bool shouldTrackPressure() const override { return false; } @@ -92,6 +127,9 @@ /// Initialize the strategy after building the DAG for a new region. void initialize(ScheduleDAGMI *dag) override; + /// Tell the strategy that current MBB is done. + void leaveMBB(MachineBasicBlock *DoneMB) override; + /// Pick the next node to schedule, or return NULL. SUnit *pickNode(bool &IsTopNode) override; Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" @@ -34,14 +35,126 @@ } #endif +// Try to find a single predecessor that would be interesting for the +// scheduler in the top-most region of MBB. +static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, + const MachineLoop *Loop) { + MachineBasicBlock *PredMBB = nullptr; + if (MBB->pred_size() == 1) + PredMBB = *MBB->pred_begin(); + + // The loop header has two predecessors, return the latch, but not for a + // single block loop. + if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { + for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I) + if (Loop->contains(*I)) + PredMBB = (*I == MBB ? nullptr : *I); + } + + assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) + && "Loop MBB should not consider predecessor outside of loop."); + + return PredMBB; +} + +void SystemZPostRASchedStrategy::setupHazardRecForScheduling() { + // Since the MBB regions are traversed top-down, we simply create a new + // HazardRecognizer first time (topmost), and then resuse it next time. + if (SchedStates.find(MBB) == SchedStates.end()) { + DEBUG (dbgs() << "+++ Entering MBB#" << MBB->getNumber()); + SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); + } else + DEBUG (dbgs() << "+++ Continuing in MBB#" << MBB->getNumber()); + + HazardRec = SchedStates[MBB]; + + DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB); + if(Loop && Loop->getHeader() == MBB) + dbgs() << " (Loop header)"; + dbgs() << ":\n";); +} + +void SystemZPostRASchedStrategy::transferStateFromPred() { + MachineBasicBlock *SinglePredMBB = + getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); + if (SinglePredMBB == nullptr || + SchedStates.find(SinglePredMBB) == SchedStates.end()) + return; + + DEBUG (dbgs() << "+++ Continued scheduling from MBB#" + << SinglePredMBB->getNumber() << "\n";); + + SchedStates[MBB]->copyCounters(SchedStates[SinglePredMBB]); + + // Emit incoming terminator(s). + for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); + I != SinglePredMBB->end(); I++) + if (SchedStates[MBB]->emitIncomingBranch(&*I, MBB)) + break; +} + +void SystemZPostRASchedStrategy::leaveMBB(MachineBasicBlock *DoneMBB) { + MBB = DoneMBB; // (MBB may not have been set in case of an empty MBB) + DEBUG (dbgs() << "+++ Leaving MBB#" << MBB->getNumber() << "\n";); + + MachineBasicBlock::iterator I; + if (SchedStates.find(MBB) == SchedStates.end()) { + // No scheduling done. Take state from predecessor if possible and then + // emit all instructions. + SchedStates[MBB] = + new SystemZHazardRecognizer(TII, &SchedModel); + transferStateFromPred(); + I = MBB->begin(); + } else + // If scheduling was done, emit everything after the region. + I = std::next(SchedStates[MBB]->getLastEmittedMI()); + + // Advance to first terminator. The successor block will handle them in + // dependent on CFG layout (T/NT branch etc). + SchedStates[MBB]->advance(I, MBB->getFirstTerminator()); +} + SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) - : DAG(nullptr), HazardRec(C) {} + : MLI(C->MLI), + TII(static_cast + (C->MF->getSubtarget().getInstrInfo())), + MBB(nullptr), CurrBegin(nullptr), HazardRec(nullptr) { + const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); + SchedModel.init(ST->getSchedModel(), ST, TII); + TmpHazRec = new SystemZHazardRecognizer(TII, &SchedModel); +} + +SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { + // Delete hazard recognizers kept around for each MBB. + for (auto I : SchedStates) { + SystemZHazardRecognizer *hazrec = I.second; + delete hazrec; + } + delete TmpHazRec; +} + +void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + MBB = Begin->getParent(); + CurrBegin = Begin; +} void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { - DAG = dag; - HazardRec.setDAG(dag); - HazardRec.Reset(); + setupHazardRecForScheduling(); + + MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI(); + MachineBasicBlock::iterator PreRegBegin = + (LastEmittedMI != nullptr ? std::next(LastEmittedMI) : MBB->begin()); + + // If this is top-most in MBB, try to take over the state from a single + // predecessor, if it has been scheduled. + if (PreRegBegin == MBB->begin()) + transferStateFromPred(); + + // Emit any instructions before start of region. + HazardRec->advance(PreRegBegin, CurrBegin); } // Pick the next node to schedule. @@ -55,25 +168,25 @@ // If only one choice, return it. if (Available.size() == 1) { DEBUG (dbgs() << "+++ Only one: "; - HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); + HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); return *Available.begin(); } // All nodes that are possible to schedule are stored by in the // Available set. - DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec);); + DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec);); Candidate Best; for (auto *SU : Available) { // SU is the next candidate to be compared against current Best. - Candidate c(SU, HazardRec); + Candidate c(SU, *HazardRec); // Remeber which SU is the best candidate. if (Best.SU == nullptr || c < Best) { Best = c; DEBUG(dbgs() << "+++ Best sofar: "; - HazardRec.dumpSU(Best.SU, dbgs()); + HazardRec->dumpSU(Best.SU, dbgs()); if (Best.GroupingCost != 0) dbgs() << "\tGrouping cost:" << Best.GroupingCost; if (Best.ResourcesCost != 0) @@ -138,13 +251,13 @@ // Remove SU from Available set and update HazardRec. Available.erase(SU); - HazardRec.EmitInstruction(SU); + HazardRec->EmitInstruction(SU); } void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { // Set isScheduleHigh flag on all SUs that we want to consider first in // pickNode(). - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); Index: lib/Target/SystemZ/SystemZSubtarget.h =================================================================== --- lib/Target/SystemZ/SystemZSubtarget.h +++ lib/Target/SystemZ/SystemZSubtarget.h @@ -81,6 +81,10 @@ return &TSInfo; } + // Process scheduling regions top-down so that scheduler states can be + // transferrred over block boundaries. + bool doMBBSchedRegionsTopDown() const override { return true; } + // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } Index: test/CodeGen/SystemZ/int-cmp-48.ll =================================================================== --- test/CodeGen/SystemZ/int-cmp-48.ll +++ test/CodeGen/SystemZ/int-cmp-48.ll @@ -29,8 +29,8 @@ define void @f2(i8 *%src) { ; CHECK-LABEL: f2: ; CHECK: llc [[REG:%r[0-5]]], 0(%r2) -; CHECK: tmll [[REG]], 1 -; CHECK: mvi 0(%r2), 0 +; CHECK-DAG: mvi 0(%r2), 0 +; CHECK-DAG: tmll [[REG]], 1 ; CHECK: ber %r14 ; CHECK: br %r14 entry: