Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Standalone View
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Show First 20 Lines • Show All 786 Lines • ▼ Show 20 Lines | for (const auto &Reg : FuncInfo->getLaneVGPRs()) { | ||||
if (!ScratchExecCopy) | if (!ScratchExecCopy) | ||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, | ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, | ||||
/*IsProlog*/ true); | /*IsProlog*/ true); | ||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, | ||||
*FI); | *FI); | ||||
} | } | ||||
for (auto ReservedWWM : FuncInfo->wwmAllocation()) { | |||||
if (!ScratchExecCopy) | |||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, | |||||
/*IsProlog*/ true); | |||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, | |||||
std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); | |||||
} | |||||
if (ScratchExecCopy) { | if (ScratchExecCopy) { | ||||
// FIXME: Split block and make terminator. | // FIXME: Split block and make terminator. | ||||
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; | ||||
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; | ||||
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) | ||||
.addReg(ScratchExecCopy, RegState::Kill); | .addReg(ScratchExecCopy, RegState::Kill); | ||||
LiveRegs.addReg(ScratchExecCopy); | LiveRegs.addReg(ScratchExecCopy); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 244 Lines • ▼ Show 20 Lines | for (const auto &Reg : FuncInfo->getLaneVGPRs()) { | ||||
if (!ScratchExecCopy) | if (!ScratchExecCopy) | ||||
ScratchExecCopy = | ScratchExecCopy = | ||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); | ||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, | ||||
*FI); | *FI); | ||||
} | } | ||||
for (auto ReservedWWM : FuncInfo->wwmAllocation()) { | |||||
if (!ScratchExecCopy) | |||||
ScratchExecCopy = | |||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); | |||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, | |||||
std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); | |||||
} | |||||
if (ScratchExecCopy) { | if (ScratchExecCopy) { | ||||
// FIXME: Split block and make terminator. | // FIXME: Split block and make terminator. | ||||
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; | ||||
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; | ||||
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) | ||||
.addReg(ScratchExecCopy, RegState::Kill); | .addReg(ScratchExecCopy, RegState::Kill); | ||||
} | } | ||||
} | } | ||||
Show All 31 Lines | void SIFrameLowering::processFunctionBeforeFrameFinalized( | ||||
MachineFrameInfo &MFI = MF.getFrameInfo(); | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIInstrInfo *TII = ST.getInstrInfo(); | const SIInstrInfo *TII = ST.getInstrInfo(); | ||||
const SIRegisterInfo *TRI = ST.getRegisterInfo(); | const SIRegisterInfo *TRI = ST.getRegisterInfo(); | ||||
MachineRegisterInfo &MRI = MF.getRegInfo(); | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | ||||
if (!FuncInfo->isEntryFunction()) { | |||||
// Spill VGPRs used for Whole Wave Mode | |||||
FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI); | |||||
} | |||||
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() | const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() | ||||
&& EnableSpillVGPRToAGPR; | && EnableSpillVGPRToAGPR; | ||||
if (SpillVGPRToAGPR) { | if (SpillVGPRToAGPR) { | ||||
// To track the spill frame indices handled in this pass. | // To track the spill frame indices handled in this pass. | ||||
BitVector SpillFIs(MFI.getObjectIndexEnd(), false); | BitVector SpillFIs(MFI.getObjectIndexEnd(), false); | ||||
BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); | BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); | ||||
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | for (MachineBasicBlock &MBB : MF) { | ||||
for (MachineInstr &MI : MBB) { | for (MachineInstr &MI : MBB) { | ||||
// WRITELANE can overwrite the inactive lanes of VGPRs and callee must | // WRITELANE can overwrite the inactive lanes of VGPRs and callee must | ||||
// spill and restore them even if they are marked Caller-saved. | // spill and restore them even if they are marked Caller-saved. | ||||
if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32) | if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32) | ||||
MFI->addToLaneVGPRs(MF, MI.getOperand(0).getReg()); | MFI->addToLaneVGPRs(MF, MI.getOperand(0).getReg()); | ||||
} | } | ||||
} | } | ||||
// Add WWM reserved VGPRs. | |||||
for (Register Reg : MFI->getWWMReservedRegs()) { | |||||
const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); | |||||
MFI->addToLaneVGPRs(MF, Reg, TRI->getSpillSize(*RC), | |||||
arsenm: If we're just going to add these to lane VGPRs, is there any real reason to distinguish the WWM… | |||||
cdevadasAuthorUnsubmitted The separate list was maintained for Serialize info. We could directly add them into LaneVGPRs otherwise. cdevadas: The separate list was maintained for Serialize info. We could directly add them into LaneVGPRs… | |||||
arsenmUnsubmitted Not Done ReplyInline ActionsThe serialization is supposed to reflect whatever's here. It's not worth preserving for its own sake arsenm: The serialization is supposed to reflect whatever's here. It's not worth preserving for its own… | |||||
Not Done ReplyInline ActionsallocateWWMSpill would be a better name arsenm: allocateWWMSpill would be a better name | |||||
TRI->getSpillAlign(*RC)); | |||||
arsenmUnsubmitted Not Done ReplyInline ActionsTheoretically we could want to call determineCalleeSaves multiple times, so I'm not sure accumulating state here is a good idea. arsenm: Theoretically we could want to call determineCalleeSaves multiple times, so I'm not sure… | |||||
cdevadasAuthorUnsubmitted The best place to fill in the LaneVGPRs (both writelane and WWM reserved VGPRs) would be in processFunctionBeforeFrameFinalized. But that is invoked after determineCalleeSaves where we mask these spills off the CSR list so that the default spill insertion in PEI would defer them, and we could later custom insert them during FrameLowering. At this moment I don't see a better place to move it. We could add a condition to enable this code only once if determineCalleeSaves needs to be called multiple times. cdevadas: The best place to fill in the LaneVGPRs (both writelane and WWM reserved VGPRs) would be in… | |||||
arsenmUnsubmitted Not Done ReplyInline ActionsWith the rename it's clearer what's going on here now. I'd prefer to keep the frame index creation in processFunctionBeforeFrameFinalized arsenm: With the rename it's clearer what's going on here now. I'd prefer to keep the frame index… | |||||
} | |||||
for (MachineBasicBlock &MBB : MF) { | for (MachineBasicBlock &MBB : MF) { | ||||
for (auto &Reg : MFI->getLaneVGPRs()) | for (auto &Reg : MFI->getLaneVGPRs()) | ||||
MBB.addLiveIn(Reg.first); | MBB.addLiveIn(Reg.first); | ||||
MBB.sortUniqueLiveIns(); | MBB.sortUniqueLiveIns(); | ||||
} | } | ||||
// Ignore the SGPRs the default implementation found. | // Ignore the SGPRs the default implementation found. | ||||
▲ Show 20 Lines • Show All 250 Lines • Show Last 20 Lines |
If we're just going to add these to lane VGPRs, is there any real reason to distinguish the WWM registers? Could we just add them to LaneVGPRs to start with?