Index: lib/Target/SystemZ/SystemZHazardRecognizer.h =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.h +++ lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -47,7 +47,9 @@ #ifndef NDEBUG const SystemZInstrInfo *TII; + std::string getSideSteerResourceName(unsigned ID) const; #endif + const TargetRegisterInfo *TRI; const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current @@ -80,9 +82,45 @@ /// the cycle index of the next group. unsigned getCurrCycleIdx(SUnit *SU = nullptr) const; - /// LastFPdOpCycleIdx stores the numbeer returned by getCurrCycleIdx() - /// when a stalling operation is scheduled (which uses the FPd resource). - unsigned LastFPdOpCycleIdx; + /// Map from a side steered resource to its previous cycle index. This is + /// the number returned by getCurrCycleIdx(), and is used when the next SU + /// using the same resource is to be scheduled preferrably either on same + /// (FXU) or opposite (FPd) side. + std::vector SideSteerIndexes; + + /// Each physical register number is its own side steer ID, so the next ID + /// gets the next higher number. + unsigned const FPdID = SystemZ::NUM_TARGET_REGS; + + /// Given SU, returns an index into the SideSteerIndexes vector, or + /// UINT_MAX if SU does not touch a side steered resource. This could be a + /// defined register or a used functional unit. + unsigned getSideSteeredResourceID(const SUnit *SU) const; + + /// GroupOffsets[1-2] are set to true if it possible that during scheduling + /// the current decoder group may actually be offset with one or two + /// instructions. For example, if MBB has two predecessors, one of them is + /// branching, and the first instruction in MBB is therefore at cycle + /// 0. The linear predecessor falls through into MBB, with one instruction + /// scheduled in the decoder group. This means that at the beginning of + /// MBB, the cycle index is either at 0 or 1. This is modelled by setting + /// GroupOffsets[1] to true. The cycle index of the first SU is always 0, + /// and the scheduling state is always as seen as without any alternative + /// groupings. These offsets are merely a means to know how the state may + /// alternatively look like. + bool GroupOffsets[3]; + + /// Returns true if it is *guaranteed* that the side steered resource given + // by ID was scheduled on the same processor side (or opposide if SameSide + // is false) as SU will be placed on. + bool checkSide(unsigned ID, bool SameSide, SUnit *SU) const; + + /// This is called when a "grouper" is emitted, meaning it will by itself + /// cause a new decode group to begin or end. After that instruction is + /// scheduled there are no grouping offsets. The SideSteerIndexes map is + /// also cleared, but only if there were any offsets to begin with, or + /// DoReset is true. + void resetSideSteering(bool DoReset = false); /// A counter of decoder groups scheduled. unsigned GrpCount; @@ -104,12 +142,15 @@ public: SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetRegisterInfo *tri, const TargetSchedModel *SM) : #ifndef NDEBUG TII(tii), #endif + TRI(tri), SchedModel(SM) { + SideSteerIndexes.resize(SystemZ::NUM_TARGET_REGS + 1/*FPd*/, UINT_MAX); Reset(); } @@ -141,12 +182,16 @@ /// a negative value means it would be good to schedule SU next. int resourcesCost(SUnit *SU); + int bypassCost(SUnit *SU) const; + #ifndef NDEBUG // Debug dumping. std::string CurGroupDbg; // current group as text void dumpSU(SUnit *SU, raw_ostream &OS) const; void dumpCurrGroup(std::string Msg = "") const; void dumpProcResourceCounters() const; + void dumpSideIndexes(std::string Msg = "") const; + void dumpGroupOffsets() const; void dumpState() const; #endif @@ -154,6 +199,15 @@ /// Copy counters from end of single predecessor. void copyState(SystemZHazardRecognizer *Incoming); + + /// At the beginning of a block with multiple predecessors, recompute the + /// state so that the current cycle index is 0. This is already correct for + /// all the branching predecessors, but the linear one may e.g. fall-through + /// into this block. At the point of calling this, it is expected that the + /// current state reflects that of the linear predecessor at the point of + /// entering the new block. The only information left after this are the + /// possible group offsets, all else is cleared. + void normalize(); }; } // namespace llvm Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -77,6 +77,99 @@ return Idx; } +static bool isFXUReg(const MachineOperand &MO) { + return (SystemZ::GRX32BitRegClass.contains(MO.getReg()) || + SystemZ::GR64BitRegClass.contains(MO.getReg()) || + SystemZ::GR128BitRegClass.contains(MO.getReg())); +} + +unsigned SystemZHazardRecognizer:: +getSideSteeredResourceID(const SUnit *SU) const { + if (SU->isUnbuffered) + return FPdID; + + const MachineInstr *MI = SU->getInstr(); + if (MI->getNumOperands()) { + const MachineOperand &MO = MI->getOperand(0); + if (MO.isReg() && MO.isDef() && !MO.isImplicit() && isFXUReg(MO)) + return MO.getReg(); + } + + return UINT_MAX; +} + +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt DOGROUPS("dogroups", cl::init(false)); +static cl::opt NOSIDESTEERRESET("nosidesteerreset", cl::init(false)); + +bool SystemZHazardRecognizer:: +checkSide(unsigned ID, bool SameSide, SUnit *SU) const { + unsigned PrevIdx = SideSteerIndexes[ID]; + if (PrevIdx == UINT_MAX) + return false; + unsigned CurrIdx = getCurrCycleIdx(SU); + + bool CurrLow = CurrIdx < 3; + bool DO = (!GroupOffsets[1] && !GroupOffsets[2]); + if (DOGROUPS) + DO = true; + if (DO) { + bool PrevLow = PrevIdx < 3; + return ((SameSide && (CurrLow == PrevLow)) || + (!SameSide && (CurrLow != PrevLow))); + } + + if (GroupOffsets[1] != GroupOffsets[2]) { + // One offset: It is possible to check for the group of two slots in each + // group overlap (Cycle indexes go left to right): + // [o o -|o o -] [- o o|- o o] + // Offs:1 [- o o|- o o] Offs:2 [o o -|o o -] + // For example, with Offs:1 (left) index 0 and 1 form a group which is + // sure to work out in all cases as "same side", and also as "opposite + // side" with index 3 and 4. + + // Build a mask for CurrIdx on the side it's on. + unsigned CurrMask = 0; + unsigned M = GroupOffsets[1] ? 3/*0b011*/ : 6/*0b110*/; + CurrMask = CurrLow ? M : M << 3; + + // Build mask for PrevIdx. + unsigned OppSideMask = CurrLow ? (CurrMask << 3) : (CurrMask >> 3); + unsigned PrevMask = SameSide ? CurrMask : OppSideMask; + + if (((1 << CurrIdx) & CurrMask) && ((1 << PrevIdx) & PrevMask)) + return true; + } + + // Regardless of group offsets, it is always possible to check for a + // matching exact distance. + unsigned Distance = std::abs((int) CurrIdx - (int) PrevIdx); + return ((SameSide && Distance == 0) || (!SameSide && Distance == 3)); +} + +void SystemZHazardRecognizer::resetSideSteering(bool DoReset) { + // If there were no offsets before this, then SideSteerIndexes is still + // valid. + bool Any = (GroupOffsets[1] || GroupOffsets[2]); + if (!Any && !DoReset) + return; + + GroupOffsets[0] = true; // always true + GroupOffsets[1] = false; + GroupOffsets[2] = false; + + if (NOSIDESTEERRESET && !DoReset) + return; + + // SidSteerIndexes become invalid in presence of offsets since the grouper + // may end up at a different cycle index depending on how many SUs were + // already in current group. + SideSteerIndexes.assign(SideSteerIndexes.size(), UINT_MAX); + if (!DoReset) + DEBUG(dbgs() << "++ Side steering reset\n";); +} + ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: getHazardType(SUnit *m, int Stalls) { return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); @@ -86,8 +179,8 @@ CurrGroupSize = 0; clearProcResCounters(); GrpCount = 0; - LastFPdOpCycleIdx = UINT_MAX; LastEmittedMI = nullptr; + resetSideSteering(true/*DoReset*/); DEBUG(CurGroupDbg = "";); } @@ -171,6 +264,17 @@ OS << "/EndsGroup"; if (SU->isUnbuffered) OS << "/Unbuffered"; + + unsigned ID = getSideSteeredResourceID(SU); + if (ID != UINT_MAX) { + OS << "/" << getSideSteerResourceName(ID); + if (ID < SystemZ::NUM_TARGET_REGS) + OS << ":d"; + } + + for (const MachineOperand &MO : SU->getInstr()->uses()) + if (MO.isReg() && !MO.isImplicit() && isFXUReg(MO)) + OS << "/" << TRI->getName(MO.getReg()) << ":u"; } void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { @@ -212,15 +316,67 @@ << "\n"; } +void SystemZHazardRecognizer:: +dumpSideIndexes(std::string Msg) const { + bool any = false; + for (unsigned I : SideSteerIndexes) + if (I != UINT_MAX) { + any = true; + break; + } + if (!any) + return; + + dbgs() << "++ | " << Msg; + bool First = true; + unsigned ColCount = 0, RowCount = 0; + for (unsigned i = 0; i < SideSteerIndexes.size() ; i++) { + if (SideSteerIndexes[i] == UINT_MAX) + continue; + if (!First) + dbgs() << ", "; + else + First = false; + dbgs() << getSideSteerResourceName(i) << ":" << SideSteerIndexes[i]; + if ((++ColCount == 8 && !RowCount) || (ColCount == 13 && RowCount)) { + dbgs() << ",\n++ | "; + ColCount = 0; + RowCount++; + First = true; + } + } + dbgs() << ".\n"; +} + +void SystemZHazardRecognizer::dumpGroupOffsets() const { + bool any = (GroupOffsets[1] || GroupOffsets[2]); + if (!any) + return; + + dbgs() << "++ | Possible group offsets:"; + if (GroupOffsets[1]) + dbgs() << " [1]"; + if (GroupOffsets[2]) + dbgs() << " [2]"; + dbgs() << "\n"; +} + void SystemZHazardRecognizer::dumpState() const { dumpCurrGroup("| Current decoder group"); dbgs() << "++ | Current cycle index: " << getCurrCycleIdx() << "\n"; dumpProcResourceCounters(); - if (LastFPdOpCycleIdx != UINT_MAX) - dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; + dumpSideIndexes("Last side steered resource cycle indexes: "); + dumpGroupOffsets(); } +std::string SystemZHazardRecognizer:: +getSideSteerResourceName(unsigned ID) const { + assert (ID < SideSteerIndexes.size() && "Invalid ID"); + if (ID == FPdID) + return "FPd"; + return TRI->getName(ID); +} #endif //NDEBUG void SystemZHazardRecognizer::clearProcResCounters() { @@ -284,11 +440,29 @@ } } - // Make note of an instruction that uses a blocking resource (FPd). - if (SU->isUnbuffered) { - LastFPdOpCycleIdx = getCurrCycleIdx(SU); - DEBUG(dbgs() << "++ Last FPd cycle index: " - << LastFPdOpCycleIdx << "\n";); + if (SC->BeginGroup || SC->EndGroup) + resetSideSteering(); + + // Remember the cycle index of SU for its side steered resource. + unsigned ID = getSideSteeredResourceID(SU); + if (ID != UINT_MAX) { + if (ID < SystemZ::NUM_TARGET_REGS) { + unsigned NumUsers = 0; + // If there are more than two users of this def, don't bother. + for (const SDep &SuccDep : SU->Succs) + if (SuccDep.getKind() == SDep::Data && + TRI->regsOverlap(ID, SuccDep.getReg())) + if (++NumUsers > 2) + break; + + if (NumUsers && NumUsers <= 2) { + for (MCSubRegIterator SubRegs(ID, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + SideSteerIndexes[*SubRegs] = getCurrCycleIdx(SU); + } + } + else + SideSteerIndexes[ID] = getCurrCycleIdx(SU); } // Insert SU into current group by increasing number of slots used @@ -331,17 +505,11 @@ bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { assert (SU->isUnbuffered); - // If this is the first FPd op, it should be scheduled high. - if (LastFPdOpCycleIdx == UINT_MAX) - return true; - // If this is not the first PFd op, it should go into the other side - // of the processor to use the other FPd unit there. This should - // generally happen if two FPd ops are placed with 2 other - // instructions between them (modulo 6). - unsigned SUCycleIdx = getCurrCycleIdx(SU); - if (LastFPdOpCycleIdx > SUCycleIdx) - return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); - return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); + // If this is the first FPd op it should be scheduled high, otherwise it + // should preferrably go to the other processor side and so use the + // alternate FPd unit. + return ((SideSteerIndexes[FPdID] == UINT_MAX) || + checkSide(FPdID, false/*SameSide*/, SU)); } int SystemZHazardRecognizer:: @@ -368,6 +536,44 @@ return Cost; } +int SystemZHazardRecognizer::bypassCost(SUnit *SU) const { + MachineInstr *MI = SU->getInstr(); + + // Put FXU register use on same side as previous def. + unsigned RegsSameSide = 0, RegsOppositeSide = 0; + for (const MachineOperand &MO : MI->uses()) + if (MO.isReg() && !MO.isImplicit() && isFXUReg(MO)) { + if (checkSide(MO.getReg(), true/*SameSide*/, SU)) + RegsSameSide++; + else if (checkSide(MO.getReg(), false/*SameSide*/, SU)) + RegsOppositeSide++; + } + + if (RegsSameSide > RegsOppositeSide) + return -2; + if (RegsOppositeSide > RegsSameSide) + return 2; + + // Give an immediate user a chance into the same decoder group. + unsigned DefedFXUReg = getSideSteeredResourceID(SU); + if (DefedFXUReg < SystemZ::NUM_TARGET_REGS) { + unsigned CurrIdx = getCurrCycleIdx(SU); + if (!(CurrIdx == 2 || CurrIdx == 5)) + return 0; + // If CurrIdx is the last slot in group and SU has a successor that is + // only waiting for DefedFXUReg, return a positive cost. + for (SDep &SuccDep : SU->Succs) { + SUnit *SuccSU = SuccDep.getSUnit(); + if (SuccDep.getKind() == SDep::Data && + TRI->regsOverlap(SuccDep.getReg(), DefedFXUReg) && + SuccSU->NumPredsLeft == 1 && !SuccSU->isBoundaryNode()) + return 1; + } + } + + return 0; +} + void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, bool TakenBranch) { // Make a temporary SUnit. @@ -395,14 +601,32 @@ unsigned GroupSizeBeforeEmit = CurrGroupSize; EmitInstruction(&SU); + // Handle Not Taken branch (linear predecessor) if (!TakenBranch && isBranchRetTrap(MI)) { // NT Branch on second slot ends group. if (GroupSizeBeforeEmit == 1) nextGroup(); + + bool Offs[3] = {true, GroupOffsets[1], GroupOffsets[2]}; + resetSideSteering(); + // If any group offsets are present, they must be recomputed based on + // where the NT branch ends up: + // Size before: 0 1 2 + // [B - -] [- B|- - -] [- - B|- - -] + // Offs:1 [- B|- - -] [- - B|- - -] [- - -|B - -] + // Offs:2 [- - B|- - -] [- - -|B - -] [- - -|- B|- - -] + if (GroupSizeBeforeEmit == 0 && (Offs[1] || Offs[2])) + GroupOffsets[2] = true; + else if (GroupSizeBeforeEmit == 1 && Offs[2]) + GroupOffsets[1] = true; + else if (GroupSizeBeforeEmit == 2 && Offs[1]) + GroupOffsets[1] = true; } - if (TakenBranch && CurrGroupSize > 0) + if (TakenBranch) { + resetSideSteering(); nextGroup(); + } assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && "Scheduler: unhandled terminator!"); @@ -418,7 +642,28 @@ ProcResourceCounters = Incoming->ProcResourceCounters; CriticalResourceIdx = Incoming->CriticalResourceIdx; - // FPd - LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; GrpCount = Incoming->GrpCount; + + // Side steering + SideSteerIndexes = Incoming->SideSteerIndexes; + GroupOffsets[1] = Incoming->GroupOffsets[1]; + GroupOffsets[2] = Incoming->GroupOffsets[2]; +} + +void SystemZHazardRecognizer::normalize() { + // Recompute the offsets after Reset(): + // Offs:0 [- - -] [X - -] [X X -] + // After reset: Offs:0 Offs:1 Offs:2 + + // Offs:1 [X - -] [X X -] [X X X] + // After reset: Offs:1 Offs:2 Offs:0 + + // Offs:2 [X X -] [X X X] [X X X|X - -] + // After reset: Offs:2 Offs:0 Offs:1 + bool Offsets[3] = {true, GroupOffsets[1], GroupOffsets[2]}; + unsigned Size = CurrGroupSize; + Reset(); + for (unsigned O = 0; O <= 2; ++O) + if (Offsets[O]) + GroupOffsets[(Size + O) % 3] = true; } Index: lib/Target/SystemZ/SystemZMachineScheduler.h =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.h +++ lib/Target/SystemZ/SystemZMachineScheduler.h @@ -32,6 +32,7 @@ const MachineLoopInfo *MLI; const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; // A SchedModel is needed before any DAG is built while advancing past // non-scheduled instructions, so it would not always be possible to call @@ -48,6 +49,9 @@ /// The processor resources cost. int ResourcesCost = 0; + /// The (negative) cost of side steering of GR registers. + int BypassCost = 0; + Candidate() = default; Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec); @@ -55,8 +59,9 @@ bool operator<(const Candidate &other); // Check if this node is free of cost ("as good as any"). - bool noCost() const { - return (GroupingCost <= 0 && !ResourcesCost); + bool noCost(bool HeightCutOff) const { + return (GroupingCost <= 0 && !ResourcesCost && + (BypassCost == -2 || HeightCutOff)); } #ifndef NDEBUG @@ -65,6 +70,8 @@ dbgs() << " Grouping cost:" << GroupingCost; if (ResourcesCost != 0) dbgs() << " Resource cost:" << ResourcesCost; + if (BypassCost != 0) + dbgs() << " BypassCost:" << BypassCost; } #endif }; Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -81,41 +82,68 @@ DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB)); MBB = NextMBB; + const MachineLoop *Loop = MLI->getLoopFor(MBB); /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to /// point to it. - HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); - DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB); - if(Loop && Loop->getHeader() == MBB) + HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, TRI, + &SchedModel); + DEBUG(if(Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)"; dbgs() << ":\n";); // Try to take over the state from a single predecessor, if it has been - // scheduled. If this is not possible, we are done. - MachineBasicBlock *SinglePredMBB = - getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); - if (SinglePredMBB == nullptr || - SchedStates.find(SinglePredMBB) == SchedStates.end()) - return; - - DEBUG(dbgs() << "** Continued scheduling from " - << printMBBReference(*SinglePredMBB) << "\n";); + // scheduled. + MachineBasicBlock *SinglePredMBB = getSingleSchedPred(MBB, Loop); + bool ContinuedSched = (SinglePredMBB != nullptr && + SchedStates.find(SinglePredMBB) != SchedStates.end()); + MachineBasicBlock *PredMBB = nullptr; + if (ContinuedSched) { + PredMBB = SinglePredMBB; + DEBUG(dbgs() << "** Continued scheduling from " + << printMBBReference(*SinglePredMBB) << "\n";); + } + else { + if (MBB->pred_size() == 0) + return; + PredMBB = &*std::prev(MBB->getIterator()); + assert (SchedStates.find(PredMBB) != SchedStates.end() && + "Expected linear predecessor to be scheduled before current MBB."); + if (!MBB->isPredecessor(PredMBB) || + (Loop != nullptr && !Loop->contains(PredMBB))) + return; + DEBUG(dbgs() << "** Linear predecessor is " + << printMBBReference(*PredMBB) << "\n";); + } - HazardRec->copyState(SchedStates[SinglePredMBB]); + HazardRec->copyState(SchedStates[PredMBB]); DEBUG(HazardRec->dumpState();); + bool Change = false; // Emit incoming terminator(s). Be optimistic and assume that branch // prediction will generally do "the right thing". - for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); - I != SinglePredMBB->end(); I++) { + for (MachineBasicBlock::iterator I = PredMBB->getFirstTerminator(); + I != PredMBB->end(); I++) { DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump();); bool TakenBranch = (I->isBranch() && (TII->getBranchInfo(*I).Target->isReg() || // Relative branch TII->getBranchInfo(*I).Target->getMBB() == MBB)); HazardRec->emitInstruction(&*I, TakenBranch); + Change = true; if (TakenBranch) break; } + + // If there are multiple predecessors, we are only interested in the + // possible group offsets, which normalize() will compute. + if (!ContinuedSched) { + DEBUG(dbgs() << "** Normalizing scheduler state at beginning of block.\n";); + HazardRec->normalize(); + Change = true; + } + + if (Change) + DEBUG(HazardRec->dumpState()); } void SystemZPostRASchedStrategy::leaveMBB() { @@ -131,6 +159,7 @@ : MLI(C->MLI), TII(static_cast (C->MF->getSubtarget().getInstrInfo())), + TRI(C->MF->getRegInfo().getTargetRegisterInfo()), MBB(nullptr), HazardRec(nullptr) { const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); SchedModel.init(ST->getSchedModel(), ST, TII); @@ -155,6 +184,11 @@ advanceTo(Begin); } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt SIDESTEERING_FXU("sidesteer-fxu", cl::init(false)); +static cl::opt FXU_HEIGHTDIFF("fxu-heightdiff", cl::init(5)); + // Pick the next node to schedule. SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { // Only scheduling top-down. @@ -193,7 +227,9 @@ // Once we know we have seen all SUs that affect grouping or use unbuffered // resources, we can stop iterating if Best looks good. - if (!SU->isScheduleHigh && Best.noCost()) + unsigned HeightDiff = + std::abs(((int) Best.SU->getHeight()) - ((int) SU->getHeight())); + if (!SU->isScheduleHigh && Best.noCost(HeightDiff >= FXU_HEIGHTDIFF)) break; } @@ -212,6 +248,10 @@ // Check the resources cost for this SU. ResourcesCost = HazardRec.resourcesCost(SU); + + // Side steering + if (SIDESTEERING_FXU) + BypassCost = HazardRec.bypassCost(SU); } bool SystemZPostRASchedStrategy::Candidate:: @@ -229,6 +269,16 @@ if (ResourcesCost > other.ResourcesCost) return false; + // Try to help FXU bypassing. + unsigned HeightDiff = + std::abs(((int) SU->getHeight()) - ((int) other.SU->getHeight())); + if (HeightDiff < FXU_HEIGHTDIFF) { + if (BypassCost < other.BypassCost) + return true; + if (BypassCost > other.BypassCost) + return false; + } + // Higher SU is otherwise generally better. if (SU->getHeight() > other.SU->getHeight()) return true;