Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -53,6 +53,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/LiveInterval.h" #include #include #include @@ -3408,6 +3409,14 @@ return false; } + /// Target specific weight factor to the cost of allocating callee-saved + /// register for the first time in a function when defering the first + /// allocation of CSRs is prefered. The default value 0 means no change in + /// current behavior. + virtual unsigned getWeightFactorToTheFirstCSRAllocation() const { + return 0; + } + /// Lower TLS global address SDNode for target independent emulated TLS model. virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -66,6 +66,7 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -148,6 +149,7 @@ // Shortcuts to some useful interface. const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const TargetLowering *TLI; RegisterClassInfo RCI; // analyses @@ -369,6 +371,9 @@ static char ID; private: + /// Track if a CSR is allocated in the current function. + bool IsCSRAllocated; + unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, SmallVirtRegSet &, unsigned = 0); @@ -416,6 +421,7 @@ unsigned PhysReg, unsigned &CostPerUseLimit, SmallVectorImpl &NewVRegs); void initializeCSRCost(); + BlockFrequency getFirstTimeCSRCost(LiveInterval &VirtReg); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, @@ -2334,10 +2340,11 @@ SmallVectorImpl &NewVRegs) { if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost - // is lower than CSRCost. + // is lower than the CSR cost. SA->analyze(&VirtReg); - if (calcSpillCost() >= CSRCost) + if (calcSpillCost() >= getFirstTimeCSRCost(VirtReg)) { return PhysReg; + } // We are going to spill, set CostPerUseLimit to 1 to make sure that // we will not use a callee-saved register in tryEvict. @@ -2346,10 +2353,10 @@ } if (getStage(VirtReg) < RS_Split) { // We choose pre-splitting over using the CSR for the first time if - // the cost of splitting is lower than CSRCost. + // the cost of splitting is lower than the CSR cost. SA->analyze(&VirtReg); unsigned NumCands = 0; - BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + BlockFrequency BestCost = getFirstTimeCSRCost(VirtReg); unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, true /*IgnoreCSR*/); if (BestCand == NoCand) @@ -2368,6 +2375,45 @@ SetOfBrokenHints.remove(&LI); } +// Return the cost of the first use of a callee-saved register. For the first +// allocation from CSRs in the current function, increase the cost if the live +// range of the value spans basic blocks in which we'd prefer not to use one. +// This will often defer the first allocation from CSRs and give shrink-wrapping +// an opportunity to sink/hoist the save/restore from entry/exit blocks +// respectively. +BlockFrequency RAGreedy::getFirstTimeCSRCost(LiveInterval &VirtReg) { + if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) + return CSRCost; + + assert(getStage(VirtReg) < RS_Split && + "Unexpected stage to find the first time CSR cost." ); + + // Increase the CSR cost only for the first allocation from CSRs in the + // current function. + if (IsCSRAllocated) + return CSRCost; + + if (!VirtReg.isSpillable()) + return CSRCost; + + // Conservatively, we try to increase the CSR cost only when none of the + // blocks in the live range have call. + ArrayRef UseBlocks = SA->getUseBlocks(); + for (int i = 0, e = UseBlocks.size(); i < e; ++i) + for (auto &MI : *UseBlocks[i].MBB) + if (MI.isCall()) + return CSRCost; + + // Now, we prefer to defering the first allocation from CSRs. Try to increase + // the CSR cost by multipling the frequency of the entry block by the weight + // factor for the first allocation from CSRs. The weight factor approximates + // the number of copies which can be traded against the first spill of CSR in + // the entry block. + uint64_t EntryFreq = MBFI->getEntryFreq(); + return std::max(CSRCost.getFrequency(), + EntryFreq * TLI->getWeightFactorToTheFirstCSRAllocation()); +} + void RAGreedy::initializeCSRCost() { // We use the larger one out of the command-line option and the value report // by TRI. @@ -2391,6 +2437,8 @@ else // Can't use BranchProbability in general, since it takes 32-bit numbers. CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); + + IsCSRAllocated = false; } /// \brief Collect the hint info for \p Reg. @@ -2568,14 +2616,19 @@ // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. - if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && - NewVRegs.empty()) { + + if ((CSRCost.getFrequency() || (!IsCSRAllocated && VirtReg.isSpillable())) && + isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); - if (CSRReg || !NewVRegs.empty()) + if (CSRReg || !NewVRegs.empty()) { + if (CSRReg) + IsCSRAllocated = true; // Return now if we decide to use a CSR or create new vregs due to // pre-splitting. return CSRReg; + } } else return PhysReg; } @@ -2723,6 +2776,7 @@ MF = &mf; TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); RCI.runOnMachineFunction(mf); EnableLocalReassign = EnableLocalReassignment || Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -628,6 +628,9 @@ bool isVarArg) const override; bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; + + + virtual unsigned getWeightFactorToTheFirstCSRAllocation() const override; }; namespace AArch64 { Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10788,3 +10788,11 @@ return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; } + +// Weight factor to the CSR cost applied when defering the first allocation of +// CSR is prefered. This value approximate the number of copies which can be +// traded against the first spill of CSR in the entry block when defering the +// first allocation from CSRs is prefered. +unsigned AArch64TargetLowering::getWeightFactorToTheFirstCSRAllocation() const { + return 30; +}