Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -405,6 +405,7 @@ // Initialize the context of the pass. MF = &mf; + MLI = &getAnalysis(); PassConfig = &getAnalysis(); if (VerifyScheduling) Index: lib/Target/SystemZ/SystemZHazardRecognizer.h =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.h +++ lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H @@ -35,10 +42,10 @@ namespace llvm { -/// SystemZHazardRecognizer maintains the state during scheduling. +/// SystemZHazardRecognizer maintains the state for one MBB during scheduling. class SystemZHazardRecognizer : public ScheduleHazardRecognizer { - ScheduleDAGMI *DAG; + const SystemZInstrInfo *TII; const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current @@ -88,18 +95,28 @@ /// ops, return true if it seems good to schedule an FPd op next. bool isFPdOpPreferred_distance(const SUnit *SU); + /// Wrap a non-scheduled instruction in an SU and emit it. + void emitInstruction(MachineInstr *MI); + + /// Last emitted instruction or nullptr. + MachineInstr *LastEmittedMI; + public: - SystemZHazardRecognizer(const MachineSchedContext *C); + SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetSchedModel *SM) + : TII(tii), SchedModel(SM) { Reset(); } - void setDAG(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = dag->getSchedModel(); - } - HazardType getHazardType(SUnit *m, int Stalls = 0) override; void Reset() override; void EmitInstruction(SUnit *SU) override; + /// Resolves and cache a resolved scheduling class for an SUnit. + const MCSchedClassDesc *getSchedClass(SUnit *SU) const { + if (!SU->SchedClass && SchedModel->hasInstrSchedModel()) + SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + return SU->SchedClass; + } + // Cost functions used by SystemZPostRASchedStrategy while // evaluating candidates. @@ -121,6 +138,20 @@ void dumpCurrGroup(std::string Msg = "") const; void dumpProcResourceCounters() const; #endif + + MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; } + + /// Copy counters from end of single predecessor. + void copyCounters(SystemZHazardRecognizer *Incoming); + + /// Update the scheduler state by emitting (non-scheduled) instructions + /// from I to NextBegin. + void advance(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator NextBegin); + + /// Emit a branch in a predecessor, and return true if it is a taken branch + /// to (current) MBB. + bool emitIncomingBranch(MachineInstr *MI, MachineBasicBlock *MBB); }; } // namespace llvm Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -36,13 +43,9 @@ "resources during scheduling."), cl::init(8)); -SystemZHazardRecognizer:: -SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), - SchedModel(nullptr) {} - unsigned SystemZHazardRecognizer:: getNumDecoderSlots(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. @@ -73,12 +76,13 @@ clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; + LastEmittedMI = nullptr; DEBUG(CurGroupDbg = "";); } bool SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return true; @@ -125,9 +129,9 @@ #ifndef NDEBUG // Debug output void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "SU(" << SU->NodeNum << "):"; - OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); + OS << TII->getName(SU->getInstr()->getOpcode()); - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; @@ -203,7 +207,7 @@ // Update state with SU as the next scheduled unit. void SystemZHazardRecognizer:: EmitInstruction(SUnit *SU) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); DEBUG( dumpCurrGroup("Decode group before emission");); // If scheduling an SU that must begin a new decoder group, move on @@ -218,8 +222,10 @@ cgd << ", "; dumpSU(SU, cgd);); + LastEmittedMI = SU->getInstr(); + // After returning from a call, we don't know much about the state. - if (SU->getInstr()->isCall()) { + if (SU->isCall) { DEBUG (dbgs() << "+++ Clearing state after call.\n";); clearProcResCounters(); LastFPdOpCycleIdx = UINT_MAX; @@ -271,7 +277,7 @@ } int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -315,7 +321,7 @@ resourcesCost(SUnit *SU) { int Cost = 0; - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -335,3 +341,82 @@ return Cost; } +void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI) { + // Make a temporary SUnit. + SUnit SU(MI, 0); + + // Set interesting flags. + SU.isCall = MI->isCall(); + + const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { + case 0: + SU.hasReservedResource = true; + break; + case 1: + SU.isUnbuffered = true; + break; + default: + break; + } + } + + EmitInstruction(&SU); +} + +void SystemZHazardRecognizer:: +copyCounters(SystemZHazardRecognizer *Incoming) { + // Current decoder group + CurrGroupSize = Incoming->CurrGroupSize; + DEBUG (CurGroupDbg = Incoming->CurGroupDbg;); + + // Processor resources + ProcResourceCounters = Incoming->ProcResourceCounters; + CriticalResourceIdx = Incoming->CriticalResourceIdx; + + // FPd + LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; + GrpCount = Incoming->GrpCount; +} + +void SystemZHazardRecognizer:: +advance(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator NextBegin) { + for (; I != NextBegin; ++I) { + if (I->isPosition() || I->isDebugValue()) + continue; + emitInstruction(&*I); + } +} + +bool SystemZHazardRecognizer::emitIncomingBranch(MachineInstr *MI, + MachineBasicBlock *MBB) { + DEBUG (dbgs() << "+++ Emitting incoming branch: "; MI->dump();); + + emitInstruction(MI); + + // Be optimistic and assume that branch prediction will generally do "the + // right thing". + + if (MI->isBranch() && + (TII->getBranchInfo(*MI).Target->isReg() || // Relative branch + TII->getBranchInfo(*MI).Target->getMBB() == MBB)) { + // Taken branch from predecessor + if (CurrGroupSize > 0) + nextGroup(false /*DbgOutput*/); + return true; + } + + assert ((MI->isBranch() || MI->isReturn() || + MI->getOpcode() == SystemZ::CondTrap) && + "Scheduler: expected a branch or conditional return/trap"); + + // NT branches end group after first decoder slot. + if (CurrGroupSize == 2) + nextGroup(false /*DbgOutput*/); + + return false; +} Index: lib/Target/SystemZ/SystemZMachineScheduler.h =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.h +++ lib/Target/SystemZ/SystemZMachineScheduler.h @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -28,7 +29,14 @@ /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + + const MachineLoopInfo *MLI; + const SystemZInstrInfo *TII; + + // A SchedModel is needed before any DAG is built while advancing past + // non-scheduled instructions, so it would not always be possible to call + // DAG->getSchedClass(SU). + TargetSchedModel SchedModel; /// A candidate during instruction evaluation. struct Candidate { @@ -79,12 +87,52 @@ /// The set of available SUs to schedule next. SUSet Available; - // HazardRecognizer that tracks the scheduler state for the current - // region. - SystemZHazardRecognizer HazardRec; - + /// Current MBB + MachineBasicBlock *MBB; + + /// Since there is no virtual leaveRegion() method, use a pointer to check + /// when scheduler has changed MBB, so that leaveMBB() will be called. + MachineBasicBlock *PreviousMBB; + + // Start and End of current region + MachineBasicBlock::iterator CurrBegin; + MachineBasicBlock::iterator CurrEnd; + + /// Maintain hazard recognizers for all blocks, so that the scheduler state + /// can be maintained past BB boundaries when appropariate. + typedef std::map MBB2HazRec; + MBB2HazRec SchedStates; + + /// Pointer to the HazardRecognizer that tracks the scheduler state for + /// the current region. + SystemZHazardRecognizer *HazardRec; + + /// A temporary HazardRecognizer used for regions that are separated (by a + /// call) from the bottom-most region of the MBB. + SystemZHazardRecognizer *TmpHazRec; + + /// Create a HazardRec for the bottom-most region of MBB and save it in + /// SchedStates, and set HazardRec to point to it. For regions above calls, + /// set HazardRec to TmpHazRec instead. + void setupHazardRecForScheduling(); + + /// Continue scheduling from end of single predecessor of Mbb and transfer + /// state into MbbState. + void transferStateFromPred(MachineBasicBlock *Mbb, + SystemZHazardRecognizer *MbbState); + + /// Leave PreviousMbb after scheduling, by emitting all instructions up to + /// but not including the first terminator. + void leavePreviousMBB(); + public: SystemZPostRASchedStrategy(const MachineSchedContext *C); + virtual ~SystemZPostRASchedStrategy(); + + /// Called for a region before scheduling. + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; /// PostRA scheduling does not track pressure. bool shouldTrackPressure() const override { return false; } Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" @@ -34,14 +35,146 @@ } #endif +// Try to find a single predecessor that would be interesting for the +// scheduler in the top-most region of MBB. +static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, + const MachineLoop *Loop) { + MachineBasicBlock *PredMBB = nullptr; + if (MBB->pred_size() == 1) + PredMBB = *MBB->pred_begin(); + + // The loop header has two predecessors, return the latch, but not for a + // single block loop. + if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { + for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I) + if (Loop->contains(*I)) + PredMBB = (*I == MBB ? nullptr : *I); + } + + assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) + && "Loop MBB should not consider predecessor outside of loop."); + + return PredMBB; +} + +void SystemZPostRASchedStrategy::setupHazardRecForScheduling() { + // We can maintain the scheder state perfectly even when scheduling regions + // in reverse order (bottom-up) in MBB, because the only scheduling + // boundaries we have are calls, which simply reset the state. + if (SchedStates.find(MBB) != SchedStates.end()) { + // Use the temporary HazardRecognizer for any regions above calls. + DEBUG (dbgs() << "+++ Continuing in MBB#" << MBB->getNumber()); + TmpHazRec->Reset(); + HazardRec = TmpHazRec; + } else { + // First time in MBB is the bottom-most region. This is the state that we + // want to save for use by successor block. + DEBUG (dbgs() << "+++ Entering MBB#" << MBB->getNumber()); + HazardRec = new SystemZHazardRecognizer(TII, &SchedModel); + SchedStates[MBB] = HazardRec; + } + + DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB); + if(Loop && Loop->getHeader() == MBB) + dbgs() << " (Loop header)"; + dbgs() << ":\n";); +} + +// Mbb can be MBB or PreviousMBB. MbbState can be either a SchedStates +// entry or TmpHazRec. +void SystemZPostRASchedStrategy:: +transferStateFromPred(MachineBasicBlock *Mbb, + SystemZHazardRecognizer *MbbState) { + MachineBasicBlock *SinglePredMBB = + getSingleSchedPred(Mbb, MLI->getLoopFor(Mbb)); + if (SinglePredMBB == nullptr || + SchedStates.find(SinglePredMBB) == SchedStates.end()) + return; + + DEBUG (dbgs() << "+++ Continued scheduling from MBB#" + << SinglePredMBB->getNumber() << "\n";); + + MbbState->copyCounters(SchedStates[SinglePredMBB]); + + // Emit incoming terminator(s). + for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); + I != SinglePredMBB->end(); I++) + if (MbbState->emitIncomingBranch(&*I, Mbb)) + break; +} + +void SystemZPostRASchedStrategy::leavePreviousMBB() { + MachineBasicBlock::iterator I; + + DEBUG (dbgs() << "+++ Leaving MBB#" << PreviousMBB->getNumber() << "\n";); + + if (SchedStates.find(PreviousMBB) == SchedStates.end()) { + // No scheduling done. Take state from predecessor if possible and then + // emit all instructions. + SchedStates[PreviousMBB] = new SystemZHazardRecognizer(TII, &SchedModel); + transferStateFromPred(PreviousMBB, SchedStates[PreviousMBB]); + I = PreviousMBB->begin(); + } else + // If scheduling was done, emit everything after the region. + I = std::next(SchedStates[PreviousMBB]->getLastEmittedMI()); + + // Advance to first terminator. The successor block will handle them in + // dependent on CFG layout (T/NT branch etc). + SchedStates[PreviousMBB]->advance(I, PreviousMBB->getFirstTerminator()); +} + SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) - : DAG(nullptr), HazardRec(C) {} + : MLI(C->MLI), + TII(static_cast + (C->MF->getSubtarget().getInstrInfo())), + MBB(nullptr), PreviousMBB(nullptr), CurrBegin(nullptr), + CurrEnd(nullptr), HazardRec(nullptr) { + const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); + SchedModel.init(ST->getSchedModel(), ST, TII); + TmpHazRec = new SystemZHazardRecognizer(TII, &SchedModel); +} + +SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { + // Delete hazard recognizers kept around for each MBB. + for (auto I : SchedStates) { + SystemZHazardRecognizer *hazrec = I.second; + delete hazrec; + } + delete TmpHazRec; +} + +void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + MBB = Begin->getParent(); + CurrBegin = Begin; + CurrEnd = End; + + // If a new MBB has been entered, finalize the previous MBB. + if (PreviousMBB != nullptr && PreviousMBB != MBB) + leavePreviousMBB(); + PreviousMBB = MBB; +} void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { - DAG = dag; - HazardRec.setDAG(dag); - HazardRec.Reset(); + setupHazardRecForScheduling(); + + // There may be non-scheduled instructions before start of region. Look + // backwards until beginning of block or a call. + MachineBasicBlock::iterator PreRegBegin = CurrBegin; + for (; PreRegBegin != MBB->begin(); --PreRegBegin) { + if (std::prev(PreRegBegin)->isCall()) + break; + } + + // If this is top-most in MBB, try to take over the state from a single + // predecessor, if it has been scheduled. + if (PreRegBegin == MBB->begin()) + transferStateFromPred(MBB, HazardRec); + + // Emit any instructions before start of region. + HazardRec->advance(PreRegBegin, CurrBegin); } // Pick the next node to schedule. @@ -55,25 +188,25 @@ // If only one choice, return it. if (Available.size() == 1) { DEBUG (dbgs() << "+++ Only one: "; - HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); + HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); return *Available.begin(); } // All nodes that are possible to schedule are stored by in the // Available set. - DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec);); + DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec);); Candidate Best; for (auto *SU : Available) { // SU is the next candidate to be compared against current Best. - Candidate c(SU, HazardRec); + Candidate c(SU, *HazardRec); // Remeber which SU is the best candidate. if (Best.SU == nullptr || c < Best) { Best = c; DEBUG(dbgs() << "+++ Best sofar: "; - HazardRec.dumpSU(Best.SU, dbgs()); + HazardRec->dumpSU(Best.SU, dbgs()); if (Best.GroupingCost != 0) dbgs() << "\tGrouping cost:" << Best.GroupingCost; if (Best.ResourcesCost != 0) @@ -138,13 +271,13 @@ // Remove SU from Available set and update HazardRec. Available.erase(SU); - HazardRec.EmitInstruction(SU); + HazardRec->EmitInstruction(SU); } void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { // Set isScheduleHigh flag on all SUs that we want to consider first in // pickNode(). - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); Index: test/CodeGen/SystemZ/int-cmp-48.ll =================================================================== --- test/CodeGen/SystemZ/int-cmp-48.ll +++ test/CodeGen/SystemZ/int-cmp-48.ll @@ -29,8 +29,8 @@ define void @f2(i8 *%src) { ; CHECK-LABEL: f2: ; CHECK: llc [[REG:%r[0-5]]], 0(%r2) -; CHECK: tmll [[REG]], 1 -; CHECK: mvi 0(%r2), 0 +; CHECK-DAG: mvi 0(%r2), 0 +; CHECK-DAG: tmll [[REG]], 1 ; CHECK: ber %r14 ; CHECK: br %r14 entry: