Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -163,8 +163,12 @@ // first. bool DisableLatencyHeuristic; + // If true, try to use instructions which can fold a reload of a reg. + bool FoldableReloadHeuristic; + MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false), - OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false) {} + OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false), + FoldableReloadHeuristic(false) {} }; /// MachineSchedStrategy - Interface to the scheduling algorithm used by @@ -769,7 +773,7 @@ /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, - ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, + ResourceReduce, ResourceDemand, FoldReload, BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -921,6 +921,13 @@ } public: + /// Return true if MI has an equivalent instruction that instead + /// reads one source reg from memory. If reg is 0, true is returned + /// if such an equivalent instruction exists, but if reg is given a + /// check is done that reg is used in the foldable operand. + virtual bool hasFoldableOperand(const MachineInstr *MI, + unsigned reg = 0) const { return false; } + /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is /// possible, returns true as well as the new instructions by reference. Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -131,6 +131,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); + const TargetInstrInfo &tii = *MF.getSubtarget().getInstrInfo(); MachineBasicBlock *mbb = nullptr; MachineLoop *loop = nullptr; bool isExiting = false; @@ -170,6 +171,11 @@ // Calculate instr weight. bool reads, writes; std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + // If mi can be transformed to fold a reload of li.reg, then + // weight for reading becomes 0. + if (reads && tii.hasFoldableOperand(mi, li.reg)) + reads = false; + weight = LiveIntervals::getSpillWeight( writes, reads, &MBFI, mi); Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -2376,6 +2376,7 @@ case RegMax: return "REG-MAX "; case ResourceReduce: return "RES-REDUCE"; case ResourceDemand: return "RES-DEMAND"; + case FoldReload: return "FOLDRELOAD"; case TopDepthReduce: return "TOP-DEPTH "; case TopPathReduce: return "TOP-PATH "; case BotHeightReduce:return "BOT-HEIGHT"; @@ -2500,6 +2501,26 @@ return false; } +static bool tryFoldableReload(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone, + const TargetInstrInfo *TII) { + bool CandReloadFoldable = TII->hasFoldableOperand(Cand.SU->getInstr()); + bool TryCandReloadFoldable = TII->hasFoldableOperand(TryCand.SU->getInstr()); + + if (Zone.isTop()) { + if (tryLess(TryCandReloadFoldable, CandReloadFoldable, + TryCand, Cand, GenericSchedulerBase::FoldReload)) + return true; + } + else { + if (tryGreater(TryCandReloadFoldable, CandReloadFoldable, + TryCand, Cand, GenericSchedulerBase::FoldReload)) + return true; + } + return false; +} + static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") @@ -2834,6 +2855,14 @@ TryCand, Cand, ResourceDemand)) return; + // Try to put lower in final schedule an instruction that doesn't + // mind if a source reg gets spilled, i.e. it can fold a reload of + // it. This makes the source operands more likely to be spilled, as + // opposed to the def operand. + if (RegionPolicy.FoldableReloadHeuristic && + tryFoldableReload(TryCand, Cand, Zone, DAG->TII)) + return; + // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -192,6 +192,8 @@ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; + bool hasFoldableOperand(const MachineInstr *MI, unsigned reg = 0) const + override; bool ReverseBranchCondition(SmallVectorImpl &Cond) const override; Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -359,6 +359,27 @@ } bool SystemZInstrInfo:: +hasFoldableOperand(const MachineInstr *MI, unsigned reg) const { + if (SystemZ::getMemOpcode(MI->getOpcode()) == -1) + return false; + + // If MI is mapped to a memory-opcode, it can fold one of its + // operands in case that operand register gets spilled. If reg is 0, + // we don't know which operand might be spilled, but the mischeduler + // TryCandidate() can help things a bit generally by putting MI a + // bit lower in final schedule. + if (!reg) + return true; + + // If reg is given, check if that operand could be folded if placed + // on stack. CalcSpillWeights will in this case decrease the cost + // estimate for spilling the register. + unsigned NumOps = MI->getNumExplicitOperands(); + const MachineOperand &MO = MI->getOperand(NumOps - 1); + return (MO.isReg() && MO.getReg() == reg); +} + +bool SystemZInstrInfo:: ReverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 2 && "Invalid condition"); Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm()); @@ -863,7 +884,7 @@ } // If the spilled operand is the final one, try to change R - // into . + // into . TODO: Is it possible to commute instructions here? int MemOpcode = SystemZ::getMemOpcode(Opcode); if (MemOpcode >= 0) { unsigned NumOps = MI->getNumExplicitOperands(); Index: lib/Target/SystemZ/SystemZSubtarget.h =================================================================== --- lib/Target/SystemZ/SystemZSubtarget.h +++ lib/Target/SystemZ/SystemZSubtarget.h @@ -73,6 +73,10 @@ return &TSInfo; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const override; + // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } Index: lib/Target/SystemZ/SystemZSubtarget.cpp =================================================================== --- lib/Target/SystemZ/SystemZSubtarget.cpp +++ lib/Target/SystemZ/SystemZSubtarget.cpp @@ -10,9 +10,15 @@ #include "SystemZSubtarget.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; +static cl::opt +FoldableReloadHeuristic("foldable-reloads", cl::Hidden, + cl::desc("Consider reg->memory opcodes during mischeduling"), + cl::init(true)); + #define DEBUG_TYPE "systemz-subtarget" #define GET_SUBTARGETINFO_TARGET_DESC @@ -70,3 +76,17 @@ // case isn't easy to detect. return false; } + +void SystemZSubtarget:: +overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const +{ + // Bidirectional scheduling pre-ra is benefitial according to benchmarks. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + + // Enable heuristic for foldable reloads, i.e. prefer to spill a + // register if it is read by an instruction who can fold the reload. + Policy.FoldableReloadHeuristic = FoldableReloadHeuristic; +}