diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 3fc25034dded..9bee5e8d1864 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -1,260 +1,261 @@ //-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // -------------------------- Post RA scheduling ---------------------------- // // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the // usage of processor resources. Scheduler states are saved for the end // region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" #include "llvm/CodeGen/MachineLoopInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-scheduler" #ifndef NDEBUG // Print the set of SUs void SystemZPostRASchedStrategy::SUSet:: dump(SystemZHazardRecognizer &HazardRec) const { dbgs() << "{"; for (auto &SU : *this) { HazardRec.dumpSU(SU, dbgs()); if (SU != *rbegin()) dbgs() << ", "; } dbgs() << "}\n"; } #endif // Try to find a single predecessor that would be interesting for the // scheduler in the top-most region of MBB. static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, const MachineLoop *Loop) { MachineBasicBlock *PredMBB = nullptr; if (MBB->pred_size() == 1) PredMBB = *MBB->pred_begin(); // The loop header has two predecessors, return the latch, but not for a // single block loop. if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I) if (Loop->contains(*I)) PredMBB = (*I == MBB ? nullptr : *I); } assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) && "Loop MBB should not consider predecessor outside of loop."); return PredMBB; } void SystemZPostRASchedStrategy:: advanceTo(MachineBasicBlock::iterator NextBegin) { MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI(); MachineBasicBlock::iterator I = ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ? std::next(LastEmittedMI) : MBB->begin()); for (; I != NextBegin; ++I) { if (I->isPosition() || I->isDebugInstr()) continue; HazardRec->emitInstruction(&*I); } } void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { + Available.clear(); // -misched-cutoff. LLVM_DEBUG(HazardRec->dumpState();); } void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { assert ((SchedStates.find(NextMBB) == SchedStates.end()) && "Entering MBB twice?"); LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB)); MBB = NextMBB; /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to /// point to it. HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB); if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)"; dbgs() << ":\n";); // Try to take over the state from a single predecessor, if it has been // scheduled. If this is not possible, we are done. MachineBasicBlock *SinglePredMBB = getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); if (SinglePredMBB == nullptr || SchedStates.find(SinglePredMBB) == SchedStates.end()) return; LLVM_DEBUG(dbgs() << "** Continued scheduling from " << printMBBReference(*SinglePredMBB) << "\n";); HazardRec->copyState(SchedStates[SinglePredMBB]); LLVM_DEBUG(HazardRec->dumpState();); // Emit incoming terminator(s). Be optimistic and assume that branch // prediction will generally do "the right thing". for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); I != SinglePredMBB->end(); I++) { LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump();); bool TakenBranch = (I->isBranch() && (TII->getBranchInfo(*I).isIndirect() || TII->getBranchInfo(*I).getMBBTarget() == MBB)); HazardRec->emitInstruction(&*I, TakenBranch); if (TakenBranch) break; } } void SystemZPostRASchedStrategy::leaveMBB() { LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";); // Advance to first terminator. The successor block will handle terminators // dependent on CFG layout (T/NT branch etc). advanceTo(MBB->getFirstTerminator()); } SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) : MLI(C->MLI), TII(static_cast (C->MF->getSubtarget().getInstrInfo())), MBB(nullptr), HazardRec(nullptr) { const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); SchedModel.init(ST); } SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { // Delete hazard recognizers kept around for each MBB. for (auto I : SchedStates) { SystemZHazardRecognizer *hazrec = I.second; delete hazrec; } } void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { // Don't emit the terminators. if (Begin->isTerminator()) return; // Emit any instructions before start of region. advanceTo(Begin); } // Pick the next node to schedule. SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { // Only scheduling top-down. IsTopNode = true; if (Available.empty()) return nullptr; // If only one choice, return it. if (Available.size() == 1) { LLVM_DEBUG(dbgs() << "** Only one: "; HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); return *Available.begin(); } // All nodes that are possible to schedule are stored in the Available set. LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec);); Candidate Best; for (auto *SU : Available) { // SU is the next candidate to be compared against current Best. Candidate c(SU, *HazardRec); // Remeber which SU is the best candidate. if (Best.SU == nullptr || c < Best) { Best = c; LLVM_DEBUG(dbgs() << "** Best so far: ";); } else LLVM_DEBUG(dbgs() << "** Tried : ";); LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts(); dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";); // Once we know we have seen all SUs that affect grouping or use unbuffered // resources, we can stop iterating if Best looks good. if (!SU->isScheduleHigh && Best.noCost()) break; } assert (Best.SU != nullptr); return Best.SU; } SystemZPostRASchedStrategy::Candidate:: Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() { SU = SU_; // Check the grouping cost. For a node that must begin / end a // group, it is positive if it would do so prematurely, or negative // if it would fit naturally into the schedule. GroupingCost = HazardRec.groupingCost(SU); // Check the resources cost for this SU. ResourcesCost = HazardRec.resourcesCost(SU); } bool SystemZPostRASchedStrategy::Candidate:: operator<(const Candidate &other) { // Check decoder grouping. if (GroupingCost < other.GroupingCost) return true; if (GroupingCost > other.GroupingCost) return false; // Compare the use of resources. if (ResourcesCost < other.ResourcesCost) return true; if (ResourcesCost > other.ResourcesCost) return false; // Higher SU is otherwise generally better. if (SU->getHeight() > other.SU->getHeight()) return true; if (SU->getHeight() < other.SU->getHeight()) return false; // If all same, fall back to original order. if (SU->NodeNum < other.SU->NodeNum) return true; return false; } void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") "; if (Available.size() == 1) dbgs() << "(only one) "; Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";); // Remove SU from Available set and update HazardRec. Available.erase(SU); HazardRec->EmitInstruction(SU); } void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { // Set isScheduleHigh flag on all SUs that we want to consider first in // pickNode(). const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); // Put all released SUs in the Available set. Available.insert(SU); } diff --git a/llvm/test/CodeGen/SystemZ/misched-cutoff.ll b/llvm/test/CodeGen/SystemZ/misched-cutoff.ll new file mode 100644 index 000000000000..0de80a22c301 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/misched-cutoff.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -misched-cutoff=1 -o /dev/null < %s +; +; Test that the post-ra scheduler does not crash with -misched-cutoff. + +@g_184 = external dso_local global i16, align 2 +@g_294 = external dso_local global [1 x [9 x i32*]], align 8 + +define void @fun() { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %i = phi i64 [ 0, %bb ], [ %i22, %bb1 ] + %i2 = trunc i64 %i to i32 + %i3 = lshr i32 %i2, 1 + %i4 = select i1 false, i32 %i3, i32 undef + %i5 = lshr i32 %i4, 1 + %i6 = xor i32 %i5, -306674912 + %i7 = select i1 undef, i32 %i5, i32 %i6 + %i8 = lshr i32 %i7, 1 + %i9 = xor i32 %i8, -306674912 + %i10 = select i1 undef, i32 %i8, i32 %i9 + %i11 = lshr i32 %i10, 1 + %i12 = xor i32 %i11, -306674912 + %i13 = select i1 undef, i32 %i11, i32 %i12 + %i14 = lshr i32 %i13, 1 + %i15 = select i1 false, i32 %i14, i32 undef + %i16 = lshr i32 %i15, 1 + %i17 = select i1 false, i32 %i16, i32 undef + %i18 = lshr i32 %i17, 1 + %i19 = select i1 false, i32 %i18, i32 undef + %i20 = lshr i32 %i19, 1 + %i21 = select i1 false, i32 %i20, i32 undef + store i32 %i21, i32* undef, align 4 + %i22 = add nuw nsw i64 %i, 1 + %i23 = icmp ult i64 %i, 255 + br i1 %i23, label %bb1, label %bb24 + +bb24: ; preds = %bb1 + %i25 = load volatile i16, i16* undef + store i32* null, i32** undef, align 8 + store i32 -10, i32* undef, align 4 + store i32 -10, i32* null, align 4 + store i32 -10, i32* undef, align 4 + store i16 0, i16* @g_184, align 2 + store i32* null, i32** getelementptr inbounds ([1 x [9 x i32*]], [1 x [9 x i32*]]* @g_294, i64 0, i64 0, i64 2), align 8 + store i32* null, i32** getelementptr inbounds ([1 x [9 x i32*]], [1 x [9 x i32*]]* @g_294, i64 0, i64 0, i64 5), align 8 + unreachable +}