Index: include/llvm/CodeGen/MachineInstr.h =================================================================== --- include/llvm/CodeGen/MachineInstr.h +++ include/llvm/CodeGen/MachineInstr.h @@ -1050,7 +1050,7 @@ /// Mark all subregister defs of register @p Reg with the undef flag. /// This function is used when we determined to have a subregister def in an /// otherwise undefined super register. - void addRegisterDefReadUndef(unsigned Reg); + void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true); /// addRegisterDefined - We have determined MI defines a register. Make sure /// there is an operand defining Reg. Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -150,14 +150,17 @@ struct MachineSchedPolicy { // Allow the scheduler to disable register pressure tracking. bool ShouldTrackPressure; + // Allow the schedule to track subregister defs to different lanes + // independently. + bool ShouldTrackLaneMasks; // Allow the scheduler to force top-down or bottom-up scheduling. If neither // is true, the scheduler runs in both directions and converges. bool OnlyTopDown; bool OnlyBottomUp; - MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false), - OnlyBottomUp(false) {} + MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false), + OnlyTopDown(false), OnlyBottomUp(false) {} }; /// MachineSchedStrategy - Interface to the scheduling algorithm used by @@ -179,6 +182,10 @@ /// initializing this strategy. Called after initPolicy. virtual bool shouldTrackPressure() const { return true; } + /// Check if lane masks should be tracked to schedule subregister defs of the + /// same vreg independently. Called after initPolicy(). + virtual bool shouldTrackLaneMasks() const { return false; } + /// Initialize the strategy after building the DAG for a new region. virtual void initialize(ScheduleDAGMI *DAG) = 0; @@ -458,6 +465,8 @@ void updatePressureDiffs(ArrayRef LiveUses); + void updateFirstDef(const VRegInfo &Info, const MachineInstr *ToBeScheduled); + void updateScheduledPressure(const SUnit *SU, const std::vector &NewMaxPressure); }; @@ -866,6 +875,10 @@ return RegionPolicy.ShouldTrackPressure; } + bool shouldTrackLaneMasks() const override { + return RegionPolicy.ShouldTrackLaneMasks; + } + void initialize(ScheduleDAGMI *dag) override; SUnit *pickNode(bool &IsTopNode) override; Index: include/llvm/CodeGen/RegisterPressure.h =================================================================== --- include/llvm/CodeGen/RegisterPressure.h +++ include/llvm/CodeGen/RegisterPressure.h @@ -304,10 +304,11 @@ /// Recede across the previous instruction. bool recede(SmallVectorImpl *LiveUses = nullptr, + SmallVectorImpl *Defs = nullptr, PressureDiff *PDiff = nullptr); /// Advance across the current instruction. - bool advance(); + bool advance(SmallVectorImpl *Defs = nullptr); /// Finalize the region boundaries and recored live ins and live outs. void closeRegion(); Index: include/llvm/CodeGen/ScheduleDAGInstrs.h =================================================================== --- include/llvm/CodeGen/ScheduleDAGInstrs.h +++ include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -43,6 +43,19 @@ } }; + struct VRegInfo { + unsigned VirtReg; + unsigned NumUnscheduledDefSUs : 31; + bool LiveIn : 1; + + VRegInfo(unsigned VReg) + : VirtReg(VReg), NumUnscheduledDefSUs(0), LiveIn(false) {} + + unsigned getSparseSetIndex() const { + return TargetRegisterInfo::virtReg2Index(VirtReg); + } + }; + /// Record a physical register access. /// For non-data-dependent uses, OpIdx == -1. struct PhysRegSUOper { @@ -72,6 +85,8 @@ /// vreg use list. typedef SparseMultiSet VReg2SUnitMultiMap; + typedef SparseSet VRegInfoMap; + /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. class ScheduleDAGInstrs : public ScheduleDAG { @@ -122,6 +137,12 @@ /// is mapped to a set of SUnits. These include all local vreg uses, not /// just the uses for a singly defined vreg. VReg2SUnitMultiMap VRegUses; + /// After calling BuildSchedGraph, each vreg def in the scheduling region + /// is mapped to a set of SUnits. + VReg2SUnitMultiMap VRegDefs; + + /// Information (mostly the number of unscheduled defs left) about a vreg. + VRegInfoMap VRegInfos; /// State internal to DAG building. /// ------------------------------- Index: lib/CodeGen/LiveIntervalAnalysis.cpp =================================================================== --- lib/CodeGen/LiveIntervalAnalysis.cpp +++ lib/CodeGen/LiveIntervalAnalysis.cpp @@ -469,7 +469,7 @@ if (MRI->shouldTrackSubRegLiveness(LI.reg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); - MI->addRegisterDefReadUndef(LI.reg); + MI->setRegisterDefReadUndef(LI.reg); } } Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1863,11 +1863,11 @@ } } -void MachineInstr::addRegisterDefReadUndef(unsigned Reg) { +void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) continue; - MO.setIsUndef(); + MO.setIsUndef(IsUndef); } } Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -859,6 +859,7 @@ SUPressureDiffs.clear(); ShouldTrackPressure = SchedImpl->shouldTrackPressure(); + TrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); } // Setup the register pressure trackers for the top scheduled top and bottom @@ -921,6 +922,12 @@ dbgs() << TRI->getRegPressureSetName( RegionCriticalPSets[i].getPSet()) << " "; dbgs() << "\n"); + + if (TrackLaneMasks) { + // Set read-undef flag for VRegs with exactly one (subregister) def. + for (const VRegInfo &Info : VRegInfos) + updateFirstDef(Info, nullptr); + } } void ScheduleDAGMILive:: @@ -992,6 +999,48 @@ } } +/// Register pressure is tracked for full vregs. This means a subregister def +/// does not change the register pressure as it appears to be reading+writing +/// the vreg, except for the first which just writes the vreg. This means we +/// have to change the pressure diffs as soon as we only have 1 def left to +/// schedule and know it will become the first. +/// If @p ToBeScheduled is given this node is considered as already scheduled. +void ScheduleDAGMILive::updateFirstDef(const VRegInfo &Info, + const MachineInstr *ToBeScheduled) { + if (Info.LiveIn || Info.NumUnscheduledDefSUs != 1) + return; + + unsigned VReg = Info.VirtReg; +#ifndef NDEBUG + unsigned Unscheduled = 0; + for (const VReg2SUnit &V2SU + : make_range(VRegDefs.find(VReg), VRegDefs.end())) { + if (!V2SU.SU->isScheduled && V2SU.SU->getInstr() != ToBeScheduled) + ++Unscheduled; + } + assert(Unscheduled == 1 && + "There should be exactly one other unscheduled instruction left"); +#endif + // There should be exactly one other unscheduled MI left which needs its + // pressure changed. + for (const VReg2SUnit &V2SU + : make_range(VRegDefs.find(VReg), VRegDefs.end())) { + SUnit *SU = V2SU.SU; + if (SU->isScheduled) + continue; + MachineInstr &MI = *SU->getInstr(); + if (&MI == ToBeScheduled) + continue; + // The remaining def will be the first in line. This means it does not read + // earlier values: Change the pressure diff and add the read-undef flag. + getPressureDiff(SU).addPressureChange(VReg, false, &MRI); + MI.setRegisterDefReadUndef(VReg, true); + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") will be first def of " + << PrintVRegOrUnit(VReg, TRI) << ": " << MI); + break; + } +} + /// schedule - Called back from MachineScheduler::runOnMachineFunction /// after setting up the current scheduling region. [RegionBegin, RegionEnd) /// only includes instructions that have DAG nodes, not scheduling boundaries. @@ -1200,9 +1249,25 @@ if (ShouldTrackPressure) { // Update top scheduled pressure. - TopRPTracker.advance(); + SmallVector Defs; + TopRPTracker.advance(&Defs); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); + + if (TrackLaneMasks) { + // If we schedule the first of a series of subregister defs, then we + // have to mark it with the read-undef flag. + for (unsigned Reg : Defs) { + if (!TRI->isVirtualRegister(Reg)) + continue; + + VRegInfo &Info = *VRegInfos.find(Reg); + if (Info.LiveIn) + continue; + MI->setRegisterDefReadUndef(Reg, true); + Info.LiveIn = true; + } + } } } else { @@ -1222,10 +1287,23 @@ if (ShouldTrackPressure) { // Update bottom scheduled pressure. SmallVector LiveUses; - BotRPTracker.recede(&LiveUses); + SmallVector Defs; + BotRPTracker.recede(&LiveUses, &Defs); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); updatePressureDiffs(LiveUses); + + if (TrackLaneMasks) { + for (unsigned Reg : Defs) { + if (!TRI->isVirtualRegister(Reg)) + continue; + + VRegInfo &Info = *VRegInfos.find(Reg); + assert(Info.NumUnscheduledDefSUs > 0); + --Info.NumUnscheduledDefSUs; + updateFirstDef(Info, MI); + } + } } } } Index: lib/CodeGen/RegisterPressure.cpp =================================================================== --- lib/CodeGen/RegisterPressure.cpp +++ lib/CodeGen/RegisterPressure.cpp @@ -456,6 +456,7 @@ /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, + SmallVectorImpl *Defs, PressureDiff *PDiff) { // Check for the top of the analyzable region. if (CurrPos == MBB->begin()) { @@ -500,6 +501,9 @@ // TODO: consider earlyclobbers? for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; + if (Defs != nullptr && !containsReg(*Defs, Reg)) + Defs->push_back(Reg); + bool DeadDef = false; if (RequireIntervals) { const LiveRange *LR = getLiveRange(Reg); @@ -552,7 +556,7 @@ } /// Advance across the current instruction. -bool RegPressureTracker::advance() { +bool RegPressureTracker::advance(SmallVectorImpl *Defs) { assert(!TrackUntiedDefs && "unsupported mode"); // Check for the bottom of the analyzable region. @@ -605,6 +609,8 @@ // Generate liveness for defs. for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; + if (Defs != nullptr && !containsReg(*Defs, Reg)) + Defs->push_back(Reg); if (LiveRegs.insert(Reg)) increaseRegPressure(Reg); } Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -388,18 +388,42 @@ /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - const MachineOperand &MO = MI->getOperand(OperIdx); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); unsigned Reg = MO.getReg(); unsigned DefLaneMask = getLaneMaskForMO(MO); + // Remember the isUndef flag, as we may reset it in a bit. + bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); + + VRegInfo *RegInfo = nullptr; + if (TrackLaneMasks) { + auto IteratorBoolPair = VRegInfos.insert(VRegInfo(Reg)); + RegInfo = &*(IteratorBoolPair.first); + RegInfo->LiveIn = !IsKill; + // Clear undef flag, we'll re-add it later once we know which Def is first. + MO.setIsUndef(false); + } + // Record this local VReg Def. if (MO.isDead()) { assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && "Dead defs should have no uses"); } else { + if (TrackLaneMasks) { + VReg2SUnitMultiMap::iterator UI = VRegDefs.find(Reg); + for (; UI != VRegDefs.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegDefs.end()) { + VRegDefs.insert(VReg2SUnit(Reg, 0, SU)); + ++RegInfo->NumUnscheduledDefSUs; + } + } + // If we have a flag, none of the lane values comes from an // earlier instruction. - unsigned KillLaneMask = MO.isUndef() ? ~0u : DefLaneMask; + unsigned KillLaneMask = IsKill ? ~0u : DefLaneMask; // Add data dependence to all uses we found so far. const TargetSubtargetInfo &ST = MF.getSubtarget(); for (VReg2SUnitMultiMap::iterator I = CurrentVRegUses.find(Reg), @@ -853,6 +877,12 @@ VRegUses.clear(); VRegUses.setUniverse(NumVirtRegs); + VRegDefs.clear(); + VRegInfos.clear(); + if (TrackLaneMasks) { + VRegDefs.setUniverse(NumVirtRegs); + VRegInfos.setUniverse(NumVirtRegs); + } // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -875,13 +905,6 @@ SUnit *SU = MISUnitMap[MI]; assert(SU && "No SUnit mapped to this MI"); - if (RPTracker) { - PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; - RPTracker->recede(/*LiveUses=*/nullptr, PDiff); - assert(RPTracker->getPos() == std::prev(MII) && - "RPTracker can't find MI"); - } - assert( (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!"); @@ -1124,6 +1147,13 @@ BarrierChain->addPred(SDep(SU, SDep::Barrier)); } } + + if (RPTracker) { + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; + RPTracker->recede(/*LiveUses=*/nullptr, /*Defs=*/nullptr, PDiff); + assert(RPTracker->getPos() == std::prev(MII) && + "RPTracker can't find MI"); + } } if (DbgMI) FirstDbgValue = DbgMI;