Index: include/llvm/CodeGen/ScheduleDAGInstrs.h =================================================================== --- include/llvm/CodeGen/ScheduleDAGInstrs.h +++ include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -248,11 +248,17 @@ void fixupKills(MachineBasicBlock *MBB); protected: void initSUnits(); + void clearCallsInDefsForReg(unsigned Reg); void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); + void addRegMaskDeps(SUnit *SU, unsigned OperIdx); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); + /// Check if the MI of SU has a dead def of Reg, or if MI clobbers + /// it according to a regmask. + bool SURegDefIsDead(const SUnit *SU, unsigned Reg); + /// \brief PostRA helper for rewriting kill flags. void startBlockForKills(MachineBasicBlock *BB); Index: include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- include/llvm/Target/TargetSubtargetInfo.h +++ include/llvm/Target/TargetSubtargetInfo.h @@ -123,6 +123,10 @@ /// for preRA scheduling with the source level scheduler. virtual bool enableMachineSchedDefaultSched() const { return true; } + /// \brief True if the machine scheduler should not treat calls as + /// scheduling boundaries. + virtual bool MISchedRescheduleCalls() const { return false; } + /// \brief True if the subtarget should enable joining global copies. /// /// By default this is enabled if the machine scheduler is enabled, but Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -81,6 +81,12 @@ static cl::opt VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); +static cl::opt RescheduleCalls( + "resched-calls", + cl::desc("Don't treat calls as scheduling boundaries in the machine " + "instruction scheduling pass."), cl::init(true), + cl::Hidden); + // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -377,19 +383,20 @@ /// Return true of the given instruction should not be included in a scheduling /// region. -/// -/// MachineScheduler does not currently support scheduling across calls. To -/// handle calls, the DAG builder needs to be modified to create register -/// anti/output dependencies on the registers clobbered by the call's regmask -/// operand. In PreRA scheduling, the stack pointer adjustment already prevents -/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce -/// the boundary, but there would be no benefit to postRA scheduling across -/// calls this late anyway. static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); + // Calls may be rescheduled if demanded by CL option or by + // subtarget. + if (MI->isCall() && !MI->isTerminator()) { + if (RescheduleCalls.getNumOccurrences()) + return !RescheduleCalls; + else + return (!MF->getSubtarget().MISchedRescheduleCalls()); + } + + return TII->isSchedulingBoundary(MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -284,6 +284,58 @@ } } +void ScheduleDAGInstrs::clearCallsInDefsForReg(unsigned Reg) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); + } +} + +/// addRegMaskDeps - Handle regmasks to be able to reschedule around +/// calls. +void ScheduleDAGInstrs::addRegMaskDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + + for (unsigned reg = 1; reg < TRI->getNumRegs(); reg++) { + if (MO.clobbersPhysReg(reg)) { + // Add output depencencies on all clobberd registers. Calls are + // expected to have register operands for in/out arguments, so + // they are not handled here. + for (MCRegAliasIterator Alias(reg, TRI, true); Alias.isValid(); ++Alias) { + if (!Defs.contains(*Alias)) + continue; + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; + if (DefSU == &ExitSU) + continue; + + // Don't add dependency to another dead def or another regmask. + bool defOp = DefSU->getInstr()->definesRegister(*Alias); + if (DefSU != SU && defOp && !SURegDefIsDead(DefSU, *Alias)) { + SDep Dep(SU, SDep::Output, /*Reg=*/*Alias); + DefSU->addPred(Dep); + } + } + } + + // Make sure the last seen call is in Defs. + clearCallsInDefsForReg(reg); + Defs.insert(PhysRegSUOper(SU, -1, reg)); + } + } +} + /// addPhysRegDeps - Add register dependencies (data, anti, and output) from /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. @@ -307,8 +359,7 @@ if (DefSU == &ExitSU) continue; if (DefSU != SU && - (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(*Alias))) { + (Kind != SDep::Output || !MO.isDead() || !SURegDefIsDead(DefSU, *Alias))) { if (Kind == SDep::Anti) DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { @@ -338,24 +389,10 @@ if (Uses.contains(Reg)) Uses.eraseAll(Reg); - if (!MO.isDead()) { + if (!MO.isDead()) Defs.eraseAll(Reg); - } else if (SU->isCall) { - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); - Reg2SUnitsMap::iterator B = P.first; - Reg2SUnitsMap::iterator I = P.second; - for (bool isBegin = I == B; !isBegin; /* empty */) { - isBegin = (--I) == B; - if (!I->SU->isCall) - break; - I = Defs.erase(I); - } - } + else if (SU->isCall) + clearCallsInDefsForReg(Reg); // Defs are pushed in the order they are visited and never reordered. Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); @@ -514,6 +551,26 @@ } } +/// Return true if SU has a dead register def operand of Reg, or a +/// regmask that clobbers it, without having a live def of it as well. +bool ScheduleDAGInstrs::SURegDefIsDead(const SUnit *SU, unsigned Reg) { + assert (TRI->isPhysicalRegister(Reg)); + bool hasDeadDef = false; + MachineInstr *MI = SU->getInstr(); + for (const auto &I : MI->operands()) { + if (I.isRegMask() && I.clobbersPhysReg(Reg)) + hasDeadDef = true; + else if (I.isReg() && I.isDef() && I.getReg() == Reg) { + if (I.isDead()) + hasDeadDef = true; + else + return false; + } + } + + return (hasDeadDef); +} + /// Return true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { @@ -911,6 +968,8 @@ bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { const MachineOperand &MO = MI->getOperand(j); + if (MO.isRegMask()) + addRegMaskDeps(SU, j); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue;