Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Standalone View
lib/CodeGen/RegAllocGreedy.cpp
Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | |||||
#include "llvm/Support/BlockFrequency.h" | #include "llvm/Support/BlockFrequency.h" | ||||
#include "llvm/Support/BranchProbability.h" | #include "llvm/Support/BranchProbability.h" | ||||
#include "llvm/Support/CommandLine.h" | #include "llvm/Support/CommandLine.h" | ||||
#include "llvm/Support/Debug.h" | #include "llvm/Support/Debug.h" | ||||
#include "llvm/Support/MathExtras.h" | #include "llvm/Support/MathExtras.h" | ||||
#include "llvm/Support/Timer.h" | #include "llvm/Support/Timer.h" | ||||
#include "llvm/Support/raw_ostream.h" | #include "llvm/Support/raw_ostream.h" | ||||
#include "llvm/Target/TargetInstrInfo.h" | #include "llvm/Target/TargetInstrInfo.h" | ||||
#include "llvm/Target/TargetLowering.h" | |||||
#include "llvm/Target/TargetMachine.h" | #include "llvm/Target/TargetMachine.h" | ||||
#include "llvm/Target/TargetRegisterInfo.h" | #include "llvm/Target/TargetRegisterInfo.h" | ||||
#include "llvm/Target/TargetSubtargetInfo.h" | #include "llvm/Target/TargetSubtargetInfo.h" | ||||
#include <algorithm> | #include <algorithm> | ||||
#include <cassert> | #include <cassert> | ||||
#include <cstdint> | #include <cstdint> | ||||
#include <memory> | #include <memory> | ||||
#include <queue> | #include <queue> | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | class RAGreedy : public MachineFunctionPass, | ||||
using SmallVirtRegSet = SmallSet<unsigned, 16>; | using SmallVirtRegSet = SmallSet<unsigned, 16>; | ||||
// context | // context | ||||
MachineFunction *MF; | MachineFunction *MF; | ||||
// Shortcuts to some useful interface. | // Shortcuts to some useful interface. | ||||
const TargetInstrInfo *TII; | const TargetInstrInfo *TII; | ||||
const TargetRegisterInfo *TRI; | const TargetRegisterInfo *TRI; | ||||
const TargetLowering *TLI; | |||||
RegisterClassInfo RCI; | RegisterClassInfo RCI; | ||||
// analyses | // analyses | ||||
SlotIndexes *Indexes; | SlotIndexes *Indexes; | ||||
MachineBlockFrequencyInfo *MBFI; | MachineBlockFrequencyInfo *MBFI; | ||||
MachineDominatorTree *DomTree; | MachineDominatorTree *DomTree; | ||||
MachineLoopInfo *Loops; | MachineLoopInfo *Loops; | ||||
MachineOptimizationRemarkEmitter *ORE; | MachineOptimizationRemarkEmitter *ORE; | ||||
▲ Show 20 Lines • Show All 252 Lines • ▼ Show 20 Lines | unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, | ||||
bool HasCompact, | bool HasCompact, | ||||
SmallVectorImpl<unsigned> &NewVRegs); | SmallVectorImpl<unsigned> &NewVRegs); | ||||
/// Check other options before using a callee-saved register for the first | /// Check other options before using a callee-saved register for the first | ||||
/// time. | /// time. | ||||
unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, | unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, | ||||
unsigned PhysReg, unsigned &CostPerUseLimit, | unsigned PhysReg, unsigned &CostPerUseLimit, | ||||
SmallVectorImpl<unsigned> &NewVRegs); | SmallVectorImpl<unsigned> &NewVRegs); | ||||
void initializeCSRCost(); | void initializeCSRCost(); | ||||
BlockFrequency getFirstTimeCSRCost(LiveInterval &VirtReg); | |||||
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, | unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, | ||||
SmallVectorImpl<unsigned>&); | SmallVectorImpl<unsigned>&); | ||||
unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, | unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, | ||||
SmallVectorImpl<unsigned>&); | SmallVectorImpl<unsigned>&); | ||||
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, | unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, | ||||
SmallVectorImpl<unsigned>&); | SmallVectorImpl<unsigned>&); | ||||
unsigned trySplit(LiveInterval&, AllocationOrder&, | unsigned trySplit(LiveInterval&, AllocationOrder&, | ||||
SmallVectorImpl<unsigned>&); | SmallVectorImpl<unsigned>&); | ||||
▲ Show 20 Lines • Show All 1,070 Lines • ▼ Show 20 Lines | while (unsigned PhysReg = Order.next()) { | ||||
// No live bundles, defer to splitSingleBlocks(). | // No live bundles, defer to splitSingleBlocks(). | ||||
if (!Cand.LiveBundles.any()) { | if (!Cand.LiveBundles.any()) { | ||||
DEBUG(dbgs() << " no bundles.\n"); | DEBUG(dbgs() << " no bundles.\n"); | ||||
continue; | continue; | ||||
} | } | ||||
Cost += calcGlobalSplitCost(Cand); | Cost += calcGlobalSplitCost(Cand); | ||||
DEBUG({ | DEBUG({ | ||||
dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) | dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) | ||||
<< " with bundles"; | << " with bundles"; | ||||
for (int i : Cand.LiveBundles.set_bits()) | for (int i : Cand.LiveBundles.set_bits()) | ||||
dbgs() << " EB#" << i; | dbgs() << " EB#" << i; | ||||
dbgs() << ".\n"; | dbgs() << ".\n"; | ||||
}); | }); | ||||
if (Cost < BestCost) { | if (Cost < BestCost) { | ||||
▲ Show 20 Lines • Show All 816 Lines • ▼ Show 20 Lines | |||||
/// to use the CSR; otherwise return 0. | /// to use the CSR; otherwise return 0. | ||||
unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, | unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, | ||||
AllocationOrder &Order, | AllocationOrder &Order, | ||||
unsigned PhysReg, | unsigned PhysReg, | ||||
unsigned &CostPerUseLimit, | unsigned &CostPerUseLimit, | ||||
SmallVectorImpl<unsigned> &NewVRegs) { | SmallVectorImpl<unsigned> &NewVRegs) { | ||||
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { | if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { | ||||
// We choose spill over using the CSR for the first time if the spill cost | // We choose spill over using the CSR for the first time if the spill cost | ||||
// is lower than CSRCost. | // is lower than the CSR cost. | ||||
SA->analyze(&VirtReg); | SA->analyze(&VirtReg); | ||||
if (calcSpillCost() >= CSRCost) | if (calcSpillCost() >= getFirstTimeCSRCost(VirtReg)) | ||||
return PhysReg; | return PhysReg; | ||||
// We are going to spill, set CostPerUseLimit to 1 to make sure that | // We are going to spill, set CostPerUseLimit to 1 to make sure that | ||||
// we will not use a callee-saved register in tryEvict. | // we will not use a callee-saved register in tryEvict. | ||||
CostPerUseLimit = 1; | CostPerUseLimit = 1; | ||||
return 0; | return 0; | ||||
} | } | ||||
if (getStage(VirtReg) < RS_Split) { | if (getStage(VirtReg) < RS_Split) { | ||||
// We choose pre-splitting over using the CSR for the first time if | // We choose pre-splitting over using the CSR for the first time if | ||||
// the cost of splitting is lower than CSRCost. | // the cost of splitting is lower than the CSR cost. | ||||
SA->analyze(&VirtReg); | SA->analyze(&VirtReg); | ||||
unsigned NumCands = 0; | unsigned NumCands = 0; | ||||
BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. | BlockFrequency BestCost = getFirstTimeCSRCost(VirtReg); | ||||
unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, | unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, | ||||
NumCands, true /*IgnoreCSR*/); | NumCands, true /*IgnoreCSR*/); | ||||
if (BestCand == NoCand) | if (BestCand == NoCand) | ||||
// Use the CSR if we can't find a region split below CSRCost. | // Use the CSR if we can't find a region split below CSRCost. | ||||
return PhysReg; | return PhysReg; | ||||
// Perform the actual pre-splitting. | // Perform the actual pre-splitting. | ||||
doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); | doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); | ||||
return 0; | return 0; | ||||
} | } | ||||
return PhysReg; | return PhysReg; | ||||
} | } | ||||
void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { | void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { | ||||
// Do not keep invalid information around. | // Do not keep invalid information around. | ||||
SetOfBrokenHints.remove(&LI); | SetOfBrokenHints.remove(&LI); | ||||
} | } | ||||
// Increase the cost for the first use of a callee-saved register if the live | |||||
// range of the value spans basic blocks in which we'd prefer not to use one. | |||||
// This will often defer use of a CSR and give shrink-wrapping an opportunity | |||||
// to sink/hoist the save/restore from entry/exit blocks respectively. | |||||
BlockFrequency RAGreedy::getFirstTimeCSRCost(LiveInterval &VirtReg) { | |||||
BlockFrequency BestCost = CSRCost; | |||||
if (TLI->useCSRInsteadOfSplit(VirtReg)) | |||||
return BestCost; | |||||
// Conservatively, we try to increase the CSR cost only when all blocks in | |||||
// the live range have no call. | |||||
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); | |||||
for (int i = 0, e = UseBlocks.size(); i < e; ++i) | |||||
for (auto &MI : *UseBlocks[i].MBB) | |||||
if (MI.isCall()) | |||||
return BestCost; | |||||
wmi: I don't see why the logic here is better than D32201. It will miss the motivational case we are… | |||||
Not Done ReplyInline ActionsHi Wei, This change was intended to make a process for the context-sensitive CSR cost model proposed in D27366. The case this change is targeting is different from D32201. I believe we can do this change on top of D32201 or this change can be treated differently from D32201. This change try to defer allocating CSR when we prefer not to use one in some context. For that I check if there is any user block which contain a call. If none of the blocks has call, then I said allocating a CSR is not preferred in that context. Then I increase CSRCost based on the frequency of the entry block. Like D32201, this change encourage more shrink-wrapping. In my experiment with spec2006/astar, one of the hot function was shrink-wrapped as we discourage the first allocation of CSR for a LR which do not expand across any call. junbuml: Hi Wei,
This change was intended to make a process for the context-sensitive CSR cost model… | |||||
// Now, we prefer to defering the first use of CSR. Try to increase the CSR | |||||
// cost by multipling the frequency of entry block with the number of tradable | |||||
// splits or spills. | |||||
uint64_t EntryFreq = MBFI->getEntryFreq(); | |||||
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) | |||||
return std::max(BestCost.getFrequency(), | |||||
EntryFreq * TLI->getNumberOfTradableSpillsAgainstCSR()); | |||||
qcolombetUnsubmitted Not Done ReplyInline ActionsThat formula does not make sense to me. qcolombet: That formula does not make sense to me.
We should compare the cost of spilling vs. the CSRCost. | |||||
junbumlAuthorUnsubmitted Not Done ReplyInline ActionsIIUC, the spilt cost is the sum of frequencies of the copies at all the related blocks, which is calculated in the current cost model. Similarly, the CSR cost should be a cost for a spill of the CSR in the entry block. I think that why we decide the CSRCost based on the frequency of the entry block in initializeCSRCost(). junbuml: IIUC, the spilt cost is the sum of frequencies of the copies at all the related blocks, which… | |||||
else if (getStage(VirtReg) < RS_Split) | |||||
return std::max(BestCost.getFrequency(), | |||||
EntryFreq * TLI->getNumberOfTradableSplitsAgainstCSR()); | |||||
else | |||||
llvm_unreachable ("Unexpected stage to find the first time CSR cost."); | |||||
} | |||||
void RAGreedy::initializeCSRCost() { | void RAGreedy::initializeCSRCost() { | ||||
// We use the larger one out of the command-line option and the value report | // We use the larger one out of the command-line option and the value report | ||||
// by TRI. | // by TRI. | ||||
CSRCost = BlockFrequency( | CSRCost = BlockFrequency( | ||||
std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); | std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); | ||||
if (!CSRCost.getFrequency()) | if (!CSRCost.getFrequency()) | ||||
return; | return; | ||||
▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines | unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, | ||||
unsigned Depth) { | unsigned Depth) { | ||||
unsigned CostPerUseLimit = ~0u; | unsigned CostPerUseLimit = ~0u; | ||||
// First try assigning a free register. | // First try assigning a free register. | ||||
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); | AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); | ||||
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { | if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { | ||||
// When NewVRegs is not empty, we may have made decisions such as evicting | // When NewVRegs is not empty, we may have made decisions such as evicting | ||||
// a virtual register, go with the earlier decisions and use the physical | // a virtual register, go with the earlier decisions and use the physical | ||||
// register. | // register. | ||||
if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && | if ((CSRCost.getFrequency() || !TLI->useCSRInsteadOfSplit(VirtReg)) && | ||||
NewVRegs.empty()) { | isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { | ||||
unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, | unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, | ||||
CostPerUseLimit, NewVRegs); | CostPerUseLimit, NewVRegs); | ||||
if (CSRReg || !NewVRegs.empty()) | if (CSRReg || !NewVRegs.empty()) | ||||
// Return now if we decide to use a CSR or create new vregs due to | // Return now if we decide to use a CSR or create new vregs due to | ||||
// pre-splitting. | // pre-splitting. | ||||
return CSRReg; | return CSRReg; | ||||
} else | } else | ||||
return PhysReg; | return PhysReg; | ||||
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | |||||
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { | bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { | ||||
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" | DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" | ||||
<< "********** Function: " << mf.getName() << '\n'); | << "********** Function: " << mf.getName() << '\n'); | ||||
MF = &mf; | MF = &mf; | ||||
TRI = MF->getSubtarget().getRegisterInfo(); | TRI = MF->getSubtarget().getRegisterInfo(); | ||||
TII = MF->getSubtarget().getInstrInfo(); | TII = MF->getSubtarget().getInstrInfo(); | ||||
TLI = MF->getSubtarget().getTargetLowering(); | |||||
RCI.runOnMachineFunction(mf); | RCI.runOnMachineFunction(mf); | ||||
EnableLocalReassign = EnableLocalReassignment || | EnableLocalReassign = EnableLocalReassignment || | ||||
MF->getSubtarget().enableRALocalReassignment( | MF->getSubtarget().enableRALocalReassignment( | ||||
MF->getTarget().getOptLevel()); | MF->getTarget().getOptLevel()); | ||||
if (VerifyEnabled) | if (VerifyEnabled) | ||||
MF->verify(this, "Before greedy register allocator"); | MF->verify(this, "Before greedy register allocator"); | ||||
RegAllocBase::init(getAnalysis<VirtRegMap>(), | RegAllocBase::init(getAnalysis<VirtRegMap>(), | ||||
getAnalysis<LiveIntervals>(), | getAnalysis<LiveIntervals>(), | ||||
getAnalysis<LiveRegMatrix>()); | getAnalysis<LiveRegMatrix>()); | ||||
Indexes = &getAnalysis<SlotIndexes>(); | Indexes = &getAnalysis<SlotIndexes>(); | ||||
MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); | MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); | ||||
DomTree = &getAnalysis<MachineDominatorTree>(); | DomTree = &getAnalysis<MachineDominatorTree>(); | ||||
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); | ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); | ||||
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); | SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); | ||||
Loops = &getAnalysis<MachineLoopInfo>(); | Loops = &getAnalysis<MachineLoopInfo>(); | ||||
Bundles = &getAnalysis<EdgeBundles>(); | Bundles = &getAnalysis<EdgeBundles>(); | ||||
SpillPlacer = &getAnalysis<SpillPlacement>(); | SpillPlacer = &getAnalysis<SpillPlacement>(); | ||||
DebugVars = &getAnalysis<LiveDebugVariables>(); | DebugVars = &getAnalysis<LiveDebugVariables>(); | ||||
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | ||||
initializeCSRCost(); | initializeCSRCost(); | ||||
calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); | calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); | ||||
DEBUG(LIS->dump()); | DEBUG(LIS->dump()); | ||||
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); | SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); | ||||
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); | SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); | ||||
ExtraRegInfo.clear(); | ExtraRegInfo.clear(); | ||||
ExtraRegInfo.resize(MRI->getNumVirtRegs()); | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | ||||
Show All 13 Lines |
I don't see why the logic here is better than D32201. It will miss the motivational case we are targeting. For a long live range of a function argument, which crosses call in a cold bb, we want to increase the first time CSRCost so that regalloc can choose to split the long live range before and after the call in the cold bb and use non-CSR register in the places other than the cold bb. As a result shrinkwrapping will be unblocked because after the splitting there will be less or no CSR use inside of entry/exit.