Index: include/llvm/CodeGen/ScheduleDAGInstrs.h =================================================================== --- include/llvm/CodeGen/ScheduleDAGInstrs.h +++ include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -311,11 +311,18 @@ void fixupKills(MachineBasicBlock *MBB); protected: void initSUnits(); + void clearCallsInDefsForReg(unsigned Reg); void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); + void addCallDeps(SUnit *SU); + void addRegMaskDeps(SUnit *SU, unsigned OperIdx); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); + /// Check if the MI of SU has a dead def of Reg, or if MI clobbers + /// it according to a regmask. + bool SURegDefIsDead(const SUnit *SU, unsigned Reg); + /// \brief PostRA helper for rewriting kill flags. void startBlockForKills(MachineBasicBlock *BB); Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -81,6 +81,12 @@ static cl::opt VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); +static cl::opt RescheduleCalls( + "resched-calls", + cl::desc("Don't treat calls as scheduling boundaries in the machine " + "instruction scheduling pass."), cl::init(true), + cl::Hidden); + // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -388,19 +394,15 @@ /// Return true of the given instruction should not be included in a scheduling /// region. -/// -/// MachineScheduler does not currently support scheduling across calls. To -/// handle calls, the DAG builder needs to be modified to create register -/// anti/output dependencies on the registers clobbered by the call's regmask -/// operand. In PreRA scheduling, the stack pointer adjustment already prevents -/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce -/// the boundary, but there would be no benefit to postRA scheduling across -/// calls this late anyway. static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); + // Calls rescheduling may be controlled by CL option. + if (MI->isCall() && !MI->isTerminator() && RescheduleCalls.getNumOccurrences()) + return !RescheduleCalls; + + return TII->isSchedulingBoundary(MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -312,6 +312,84 @@ } } +void ScheduleDAGInstrs::clearCallsInDefsForReg(unsigned Reg) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList, which will be added after this. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (I->SU->isCall) + I = Defs.erase(I); + } +} + +void ScheduleDAGInstrs::addCallDeps(SUnit *SU) { + assert(SU->isCall); + + // Make sure we don't move reserved register definitions across + // calls. (This would be unnecessary if they were guaranteed to + // always be part of a regmask operand on each call. Are they?) + for (unsigned reg = 1; reg < TRI->getNumRegs(); reg++) { + if (MRI.isReserved(reg)) { + // Add output depencencies on all reserved registers. + for (MCRegAliasIterator Alias(reg, TRI, true); Alias.isValid(); ++Alias) { + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; + if (DefSU == &ExitSU) + continue; + + bool defOp = DefSU->getInstr()->definesRegister(*Alias); + if (DefSU != SU && defOp) { + SDep Dep(SU, SDep::Output, /*Reg=*/*Alias); + DefSU->addPred(Dep); + } + } + } + + clearCallsInDefsForReg(reg); + Defs.insert(PhysRegSUOper(SU, -1, reg)); + } + } +} + +/// addRegMaskDeps - Handle regmasks to be able to reschedule around +/// calls. +void ScheduleDAGInstrs::addRegMaskDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + + for (unsigned reg = 1; reg < TRI->getNumRegs(); reg++) { + if (MO.clobbersPhysReg(reg)) { + // Add output depencencies on all clobberd registers. Calls are + // expected to have register operands for in/out arguments, so + // they are not handled here. + for (MCRegAliasIterator Alias(reg, TRI, true); Alias.isValid(); ++Alias) { + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; + if (DefSU == &ExitSU) + continue; + + // Don't add dependency to another dead def or another regmask. + bool defOp = DefSU->getInstr()->definesRegister(*Alias); + if (DefSU != SU && defOp && !SURegDefIsDead(DefSU, *Alias)) { + SDep Dep(SU, SDep::Output, /*Reg=*/*Alias); + DefSU->addPred(Dep); + } + } + } + + if (SU->isCall) + clearCallsInDefsForReg(reg); + Defs.insert(PhysRegSUOper(SU, -1, reg)); + } + } +} + /// addPhysRegDeps - Add register dependencies (data, anti, and output) from /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. @@ -336,7 +414,7 @@ continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(*Alias))) { + !SURegDefIsDead(DefSU, *Alias))) { if (Kind == SDep::Anti) DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { @@ -366,24 +444,10 @@ if (Uses.contains(Reg)) Uses.eraseAll(Reg); - if (!MO.isDead()) { + if (!MO.isDead()) Defs.eraseAll(Reg); - } else if (SU->isCall) { - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); - Reg2SUnitsMap::iterator B = P.first; - Reg2SUnitsMap::iterator I = P.second; - for (bool isBegin = I == B; !isBegin; /* empty */) { - isBegin = (--I) == B; - if (!I->SU->isCall) - break; - I = Defs.erase(I); - } - } + else if (SU->isCall) + clearCallsInDefsForReg(Reg); // Defs are pushed in the order they are visited and never reordered. Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); @@ -542,6 +606,26 @@ } } +/// Return true if SU has a dead register def operand of Reg, or a +/// regmask that clobbers it, without having a live def of it as well. +bool ScheduleDAGInstrs::SURegDefIsDead(const SUnit *SU, unsigned Reg) { + assert (TRI->isPhysicalRegister(Reg)); + bool hasDeadDef = false; + MachineInstr *MI = SU->getInstr(); + for (const auto &I : MI->operands()) { + if (I.isRegMask() && I.clobbersPhysReg(Reg)) + hasDeadDef = true; + else if (I.isReg() && I.isDef() && I.getReg() == Reg) { + if (I.isDead()) + hasDeadDef = true; + else + return false; + } + } + + return (hasDeadDef); +} + /// Return true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { @@ -948,6 +1032,8 @@ bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { const MachineOperand &MO = MI->getOperand(j); + if (MO.isRegMask()) + addRegMaskDeps(SU, j); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -963,6 +1049,10 @@ addVRegUseDeps(SU, j); } } + + if (SU->isCall) + addCallDeps(SU); + // If we haven't seen any uses in this scheduling region, create a // dependence edge to ExitSU to model the live-out latency. This is required // for vreg defs with no in-region use, and prefetches with no vreg def.