Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -150,6 +150,9 @@ struct MachineSchedPolicy { // Allow the scheduler to disable register pressure tracking. bool ShouldTrackPressure; + /// Track LaneMasks to allow reordering of independent subregister writes + /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() + bool ShouldTrackLaneMasks; // Allow the scheduler to force top-down or bottom-up scheduling. If neither // is true, the scheduler runs in both directions and converges. @@ -160,8 +163,8 @@ // first. bool DisableLatencyHeuristic; - MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false), - OnlyBottomUp(false), DisableLatencyHeuristic(false) {} + MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false), + OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false) {} }; /// MachineSchedStrategy - Interface to the scheduling algorithm used by @@ -185,6 +188,11 @@ /// initializing this strategy. Called after initPolicy. virtual bool shouldTrackPressure() const { return true; } + /// Returns true if lanemasks should be tracked. LaneMask tracking is + /// necessary to reorder independent subregister defs for the same vreg. + /// This has to be enabled in combination with shouldTrackPressure(). + virtual bool shouldTrackLaneMasks() const { return false; } + /// Initialize the strategy after building the DAG for a new region. virtual void initialize(ScheduleDAGMI *DAG) = 0; @@ -371,6 +379,7 @@ /// Register pressure in this region computed by initRegPressure. bool ShouldTrackPressure; + bool ShouldTrackLaneMasks; IntervalPressure RegPressure; RegPressureTracker RPTracker; @@ -392,8 +401,9 @@ std::unique_ptr S) : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), RegClassInfo(C->RegClassInfo), DFSResult(nullptr), - ShouldTrackPressure(false), RPTracker(RegPressure), - TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} + ShouldTrackPressure(false), ShouldTrackLaneMasks(false), + RPTracker(RegPressure), TopRPTracker(TopPressure), + BotRPTracker(BotPressure) {} ~ScheduleDAGMILive() override; @@ -874,6 +884,10 @@ return RegionPolicy.ShouldTrackPressure; } + bool shouldTrackLaneMasks() const override { + return RegionPolicy.ShouldTrackLaneMasks; + } + void initialize(ScheduleDAGMI *dag) override; SUnit *pickNode(bool &IsTopNode) override; Index: include/llvm/CodeGen/RegisterPressure.h =================================================================== --- include/llvm/CodeGen/RegisterPressure.h +++ include/llvm/CodeGen/RegisterPressure.h @@ -373,10 +373,10 @@ /// Recede across the previous instruction. bool recede(SmallVectorImpl *LiveUses = nullptr, - PressureDiff *PDiff = nullptr); + PressureDiff *PDiff = nullptr, bool AddReadUndef = false); /// Advance across the current instruction. - bool advance(); + bool advance(bool AddReadUndef = false); /// Finalize the region boundaries and recored live ins and live outs. void closeRegion(); @@ -490,7 +490,8 @@ void decreaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask); - void bumpDeadDefs(ArrayRef DeadDefs); + void bumpDeadDefs(ArrayRef DeadDefs, + MachineInstr *AddReadUndefTo); void bumpUpwardPressure(const MachineInstr *MI); void bumpDownwardPressure(const MachineInstr *MI); Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -331,6 +331,13 @@ LIS = &getAnalysis(); + if (mf.getRegInfo().subRegLivenessEnabled()) { + // Dead subregister defs have no users and therefore no dependencies, + // moving them around may cause liveintervals to degrade into multiple + // component. Change the dead subregister defs to have their own vreg. + LIS->renameDeadSubRegDefs(); + } + if (VerifyScheduling) { DEBUG(LIS->dump()); MF->verify(this, "Before machine scheduling."); @@ -869,13 +876,18 @@ SUPressureDiffs.clear(); ShouldTrackPressure = SchedImpl->shouldTrackPressure(); + ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); + if (ShouldTrackLaneMasks && !ShouldTrackPressure) + report_fatal_error("ShouldTrackLaneMasks requires ShouldTrackPressure"); } // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { - TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); - BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, false, false); + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, + ShouldTrackLaneMasks, false); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + ShouldTrackLaneMasks, false); // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); @@ -972,46 +984,71 @@ void ScheduleDAGMILive::updatePressureDiffs( ArrayRef LiveUses) { for (const RegisterMaskPair &P : LiveUses) { - /// FIXME: Currently assuming single-use physregs. unsigned Reg = P.RegUnit; - assert(P.LaneMask != 0); - DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + /// FIXME: Currently assuming single-use physregs. if (!TRI->isVirtualRegister(Reg)) continue; - // This may be called before CurrentBottom has been initialized. However, - // BotRPTracker must have a valid position. We want the value live into the - // instruction or live out of the block, so ask for the previous - // instruction's live-out. - const LiveInterval &LI = LIS->getInterval(Reg); - VNInfo *VNI; - MachineBasicBlock::const_iterator I = - nextIfDebug(BotRPTracker.getPos(), BB->end()); - if (I == BB->end()) - VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); - else { - LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); - VNI = LRQ.valueIn(); - } - // RegisterPressureTracker guarantees that readsReg is true for LiveUses. - assert(VNI && "No live value at use."); - for (const VReg2SUnit &V2SU - : make_range(VRegUses.find(Reg), VRegUses.end())) { - SUnit *SU = V2SU.SU; - // If this use comes before the reaching def, it cannot be a last use, so - // descrease its pressure change. - if (!SU->isScheduled && SU != &ExitSU) { - LiveQueryResult LRQ - = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) { - PressureDiff &PDiff = getPressureDiff(SU); - PDiff.addPressureChange(Reg, true, &MRI); - DEBUG( - dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr(); - dbgs() << " to "; - PDiff.dump(*TRI); - ); + if (ShouldTrackLaneMasks) { + // If the register has just become live then other uses won't change + // this fact anymore => decrement pressure. + // If the register has just become dead then other uses make it come + // back to life => increment pressure. + bool Decrement = P.LaneMask != 0; + + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit &SU = *V2SU.SU; + if (SU.isScheduled || &SU == &ExitSU) + continue; + + PressureDiff &PDiff = getPressureDiff(&SU); + PDiff.addPressureChange(Reg, Decrement, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " + << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) + << ' ' << *SU.getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } + } else { + assert(P.LaneMask != 0); + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + // If this use comes before the reaching def, it cannot be a last use, + // so descrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -1113,14 +1150,14 @@ // Initialize the register pressure tracker used by buildSchedGraph. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, - false, /*TrackUntiedDefs=*/true); + ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, ShouldTrackLaneMasks); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -1239,7 +1276,8 @@ if (ShouldTrackPressure) { // Update top scheduled pressure. - TopRPTracker.advance(); + bool AddReadUndef = ShouldTrackLaneMasks; + TopRPTracker.advance(AddReadUndef); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); DEBUG( dbgs() << "Top Pressure:\n"; @@ -1266,7 +1304,8 @@ if (ShouldTrackPressure) { // Update bottom scheduled pressure. SmallVector LiveUses; - BotRPTracker.recede(&LiveUses); + bool AddReadUndef = ShouldTrackLaneMasks; + BotRPTracker.recede(&LiveUses, nullptr, AddReadUndef); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); DEBUG( dbgs() << "Bottom Pressure:\n"; Index: lib/CodeGen/RegisterPressure.cpp =================================================================== --- lib/CodeGen/RegisterPressure.cpp +++ lib/CodeGen/RegisterPressure.cpp @@ -350,6 +350,19 @@ } } +static void setRegZero(SmallVectorImpl &RegUnits, + unsigned RegUnit) { + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) { + RegUnits.push_back(RegisterMaskPair(RegUnit, 0)); + } else { + I->LaneMask = 0; + } +} + static void removeRegLanes(SmallVectorImpl &RegUnits, RegisterMaskPair Pair) { unsigned RegUnit = Pair.RegUnit; @@ -480,14 +493,21 @@ static void adjustOperandsForLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, RegisterOperands &RegOpers, - SlotIndex Pos) { + SlotIndex Pos, MachineInstr *AddFlagsMI) { for (auto I = RegOpers.Defs.begin(); I != RegOpers.Defs.end(); ) { LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, I->RegUnit, Pos.getDeadSlot()); + // If the the def is all that is live after the instruction, then in case + // of a subregister def we need a read-undef flag. + if (AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0) + AddFlagsMI->setRegisterDefReadUndef(I->RegUnit); unsigned LaneMask = I->LaneMask & LiveAfter; - if (LaneMask == 0) + if (LaneMask == 0) { I = RegOpers.Defs.erase(I); - else { + // Make sure the operand is properly marked as Dead. + if (AddFlagsMI != nullptr) + AddFlagsMI->addRegisterDead(I->RegUnit, MRI.getTargetRegisterInfo()); + } else { I->LaneMask = LaneMask; ++I; } @@ -555,7 +575,8 @@ /// Record the pressure difference induced by the given operand list. static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, - const MachineRegisterInfo *MRI) { + const MachineRegisterInfo *MRI, + bool TrackLaneMasks) { assert(!PDiff.begin()->isValid() && "stale PDiff"); for (const RegisterMaskPair &P : RegOpers.Defs) @@ -606,12 +627,15 @@ discoverLiveInOrOut(Pair, P.LiveOutRegs); } -void RegPressureTracker::bumpDeadDefs(ArrayRef DeadDefs) { +void RegPressureTracker::bumpDeadDefs(ArrayRef DeadDefs, + MachineInstr *AddReadUndefTo) { for (const RegisterMaskPair &P : DeadDefs) { unsigned Reg = P.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); LaneBitmask BumpedMask = LiveMask | P.LaneMask; increaseRegPressure(Reg, LiveMask, BumpedMask); + if (AddReadUndefTo != nullptr && LiveMask == 0) + AddReadUndefTo->setRegisterDefReadUndef(Reg, true); } for (const RegisterMaskPair &P : DeadDefs) { unsigned Reg = P.RegUnit; @@ -627,7 +651,7 @@ /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, - PressureDiff *PDiff) { + PressureDiff *PDiff, bool AddReadUndef) { // Check for the top of the analyzable region. if (CurrPos == MBB->begin()) { closeRegion(); @@ -657,16 +681,20 @@ if (RequireIntervals && isTopClosed()) static_cast(P).openTop(SlotIdx); + const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers(TRI, MRI, TrackLaneMasks, false); - collectOperands(CurrPos, RegOpers); + collectOperands(&MI, RegOpers); if (TrackLaneMasks) - adjustOperandsForLiveness(*LIS, *MRI, RegOpers, SlotIdx); + adjustOperandsForLiveness(*LIS, *MRI, RegOpers, SlotIdx, + AddReadUndef ? const_cast(&MI) + : nullptr); if (PDiff) - collectPDiff(*PDiff, RegOpers, MRI); + collectPDiff(*PDiff, RegOpers, MRI, TrackLaneMasks); // Boost pressure for all dead defs together. - bumpDeadDefs(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs, + AddReadUndef ? const_cast(&MI) : nullptr); // Kill liveness at live defs. // TODO: consider earlyclobbers? @@ -700,6 +728,15 @@ } } + if (NewMask == 0) { + if (AddReadUndef && PreviousMask != ~0u) + (const_cast(MI)).setRegisterDefReadUndef(Def.RegUnit, true); + // Add a 0 entry to LiveUses as a marker that the complete vreg has become + // dead. + if (TrackLaneMasks && LiveUses != nullptr) + setRegZero(*LiveUses, Reg); + } + decreaseRegPressure(Reg, PreviousMask, NewMask); } @@ -715,8 +752,22 @@ // Did the register just become live? if (PreviousMask == 0) { if (LiveUses != nullptr) { - unsigned NewLanes = NewMask & ~PreviousMask; - addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewLanes)); + if (!TrackLaneMasks) { + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } else { + auto I = std::find_if(LiveUses->begin(), LiveUses->end(), + [Reg](const RegisterMaskPair Other) { + return Other.RegUnit == Reg; + }); + bool IsRedef = I != LiveUses->end(); + if (IsRedef) { + // ignore re-defs here... + assert(I->LaneMask == 0); + removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } else { + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } + } } // Discover live outs if this may be the first occurance of this register. @@ -738,7 +789,7 @@ } /// Advance across the current instruction. -bool RegPressureTracker::advance() { +bool RegPressureTracker::advance(bool AddReadUndef) { assert(!TrackUntiedDefs && "unsupported mode"); // Check for the bottom of the analyzable region. @@ -761,10 +812,13 @@ static_cast(P).openBottom(CurrPos); } + const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers(TRI, MRI, TrackLaneMasks, false); - collectOperands(CurrPos, RegOpers); + collectOperands(&MI, RegOpers); if (TrackLaneMasks) - adjustOperandsForLiveness(*LIS, *MRI, RegOpers, SlotIdx); + adjustOperandsForLiveness(*LIS, *MRI, RegOpers, SlotIdx, + AddReadUndef ? const_cast(&MI) + : nullptr); for (const RegisterMaskPair &Use : RegOpers.Uses) { unsigned Reg = Use.RegUnit; @@ -788,10 +842,14 @@ LaneBitmask PreviousMask = LiveRegs.insert(Def); LaneBitmask NewMask = PreviousMask | Def.LaneMask; increaseRegPressure(Def.RegUnit, PreviousMask, NewMask); + if (AddReadUndef && PreviousMask == 0 && NewMask != ~0u) { + (const_cast(MI)).setRegisterDefReadUndef(Def.RegUnit, true); + } } // Boost pressure for all dead defs together. - bumpDeadDefs(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs, + AddReadUndef ? const_cast(&MI) : nullptr); // Find the next instruction. do @@ -896,11 +954,11 @@ RegisterOperands RegOpers(TRI, MRI, TrackLaneMasks, /*IgnoreDead=*/true); collectOperands(MI, RegOpers); if (TrackLaneMasks) - adjustOperandsForLiveness(*LIS, *MRI, RegOpers, SlotIdx); + adjustOperandsForLiveness(*LIS, *MRI, RegOpers, SlotIdx, nullptr); // Boost max pressure for all dead defs together. // Since CurrSetPressure and MaxSetPressure - bumpDeadDefs(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs, nullptr); // Kill liveness at live defs. for (const RegisterMaskPair &P : RegOpers.Defs) { @@ -1137,7 +1195,7 @@ RegisterOperands RegOpers(TRI, MRI, TrackLaneMasks, false); collectOperands(MI, RegOpers); if (TrackLaneMasks) - adjustOperandsForLiveness(*LIS, *MRI, RegOpers, MISlotIdx); + adjustOperandsForLiveness(*LIS, *MRI, RegOpers, MISlotIdx, nullptr); SlotIndex SlotIdx; if (RequireIntervals) @@ -1174,7 +1232,7 @@ } // Boost pressure for all dead defs together. - bumpDeadDefs(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs, nullptr); } /// Consider the pressure increase caused by traversing this instruction Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -806,6 +806,19 @@ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + // Ignore re-defs. + if (TrackLaneMasks) { + bool FoundDef = false; + for (const MachineOperand &MO2 : MI->operands()) { + if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { + FoundDef = true; + break; + } + } + if (FoundDef) + continue; + } + // Record this local VReg use. VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); for (; UI != VRegUses.end(); ++UI) {