Changeset View
Standalone View
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Show All 20 Lines | |||||
#define DEBUG_TYPE "frame-info" | #define DEBUG_TYPE "frame-info" | ||||
static cl::opt<bool> EnableSpillVGPRToAGPR( | static cl::opt<bool> EnableSpillVGPRToAGPR( | ||||
"amdgpu-spill-vgpr-to-agpr", | "amdgpu-spill-vgpr-to-agpr", | ||||
cl::desc("Enable spilling VGPRs to AGPRs"), | cl::desc("Enable spilling VGPRs to AGPRs"), | ||||
cl::ReallyHidden, | cl::ReallyHidden, | ||||
cl::init(true)); | cl::init(true)); | ||||
// Find a register matching \p RC from \p LiveRegs which is unused and available | |||||
// throughout the function. On failure, returns AMDGPU::NoRegister. | |||||
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, | |||||
sebastian-ne: This function doesn’t do anything with CSRs, although the name says so? | |||||
You're right. Neither scratch nor CSR should be in the name. I will change it. cdevadas: You're right. Neither scratch nor CSR should be in the name. I will change it. | |||||
const LivePhysRegs &LiveRegs, | |||||
const TargetRegisterClass &RC) { | |||||
for (MCRegister Reg : RC) { | |||||
if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) | |||||
return Reg; | |||||
} | |||||
return MCRegister(); | |||||
} | |||||
// Find a scratch register that we can use in the prologue. We avoid using | // Find a scratch register that we can use in the prologue. We avoid using | ||||
// callee-save registers since they may appear to be free when this is called | // callee-save registers since they may appear to be free when this is called | ||||
// from canUseAsPrologue (during shrink wrapping), but then no longer be free | // from canUseAsPrologue (during shrink wrapping), but then no longer be free | ||||
// when this is called from emitPrologue. | // when this is called from emitPrologue. | ||||
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, | static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, | ||||
LivePhysRegs &LiveRegs, | LivePhysRegs &LiveRegs, | ||||
const TargetRegisterClass &RC, | const TargetRegisterClass &RC, | ||||
bool Unused = false) { | bool Unused = false) { | ||||
// Mark callee saved registers as used so we will not choose them. | // Mark callee saved registers as used so we will not choose them. | ||||
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); | const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); | ||||
for (unsigned i = 0; CSRegs[i]; ++i) | for (unsigned i = 0; CSRegs[i]; ++i) | ||||
LiveRegs.addReg(CSRegs[i]); | LiveRegs.addReg(CSRegs[i]); | ||||
if (Unused) { | if (Unused) { | ||||
// We are looking for a register that can be used throughout the entire | // We are looking for a register that can be used throughout the entire | ||||
// function, so any use is unacceptable. | // function, so any use is unacceptable. | ||||
for (MCRegister Reg : RC) { | return findUnusedRegister(MRI, LiveRegs, RC); | ||||
if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) | |||||
return Reg; | |||||
} | |||||
} else { | } else { | ||||
arsenmUnsubmitted Not Done ReplyInline ActionsNo else after return arsenm: No else after return | |||||
for (MCRegister Reg : RC) { | for (MCRegister Reg : RC) { | ||||
if (LiveRegs.available(MRI, Reg)) | if (LiveRegs.available(MRI, Reg)) | ||||
return Reg; | return Reg; | ||||
} | } | ||||
} | } | ||||
return MCRegister(); | return MCRegister(); | ||||
} | } | ||||
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, | static void getVGPRSpillLaneOrTempRegister( | ||||
LivePhysRegs &LiveRegs, | MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR, | ||||
Register &TempSGPR, | const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass) { | ||||
Optional<int> &FrameIndex, | |||||
bool IsFP) { | |||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | ||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | ||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIRegisterInfo *TRI = ST.getRegisterInfo(); | const SIRegisterInfo *TRI = ST.getRegisterInfo(); | ||||
unsigned Size = TRI->getSpillSize(RC); | |||||
Align Alignment = TRI->getSpillAlign(RC); | |||||
// We need to save and restore the current FP/BP. | // We need to save and restore the given SGPR. | ||||
// 1: Try to save the FP/BP in an unused SGPR. | // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs | ||||
TempSGPR = findScratchNonCalleeSaveRegister( | // should have all the callee saved registers marked as used. | ||||
MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); | Register ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC); | ||||
if (!TempSGPR) { | if (!ScratchSGPR) { | ||||
int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, | int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr, | ||||
TargetStackID::SGPRSpill); | TargetStackID::SGPRSpill); | ||||
if (TRI->spillSGPRToVGPR() && | if (TRI->spillSGPRToVGPR() && | ||||
MFI->allocateSGPRSpillToVGPR(MF, NewFI, /* IsPEI */ true)) { | MFI->allocateSGPRSpillToVGPR(MF, FI, /* IsPEI */ true)) { | ||||
// 2: There's no free lane to spill, and no free register to save FP/BP, | // 2: There's no free lane to spill, and no free register to save the | ||||
// so we're forced to spill another VGPR to use for the spill. | // SGPR, so we're forced to take another VGPR to use for the spill. | ||||
FrameIndex = NewFI; | MFI->addToCustomSGPRSpills( | ||||
SGPR, CustomSGPRSaveInfo(SGPRSaveKind::SPILL_TO_VGPR_LANE, FI)); | |||||
LLVM_DEBUG( | |||||
auto Spill = MFI->getSGPRToVGPRCustomSpills(NewFI).front(); | LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRCustomSpills(FI).front(); | ||||
dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " | dbgs() << printReg(SGPR, TRI) << " requires fallback spill to " | ||||
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); | << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane | ||||
<< '\n';); | |||||
} else { | } else { | ||||
// Remove dead <NewFI> index | // Remove dead <FI> index | ||||
Not Done ReplyInline ActionsComment should say <FI> for the renamed variable. sebastian-ne: Comment should say <FI> for the renamed variable. | |||||
Thanks. Will change it. cdevadas: Thanks. Will change it. | |||||
MF.getFrameInfo().RemoveStackObject(NewFI); | MF.getFrameInfo().RemoveStackObject(FI); | ||||
// 3: If all else fails, spill the FP/BP to memory. | // 3: If all else fails, spill the register to memory. | ||||
FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); | FI = FrameInfo.CreateSpillStackObject(Size, Alignment); | ||||
LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " | MFI->addToCustomSGPRSpills( | ||||
<< (IsFP ? "FP" : "BP") << '\n'); | SGPR, CustomSGPRSaveInfo(SGPRSaveKind::SPILL_TO_MEM, FI)); | ||||
LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling " | |||||
<< printReg(SGPR, TRI) << '\n'); | |||||
} | } | ||||
} else { | } else { | ||||
LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " | MFI->addToCustomSGPRSpills( | ||||
<< printReg(TempSGPR, TRI) << '\n'); | SGPR, | ||||
CustomSGPRSaveInfo(SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR)); | |||||
LiveRegs.addReg(ScratchSGPR); | |||||
LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to " | |||||
<< printReg(ScratchSGPR, TRI) << '\n'); | |||||
} | } | ||||
} | } | ||||
// We need to specially emit stack operations here because a different frame | // We need to specially emit stack operations here because a different frame | ||||
// register is used than in the rest of the function, as getFrameRegister would | // register is used than in the rest of the function, as getFrameRegister would | ||||
// use. | // use. | ||||
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, | static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, | ||||
const SIMachineFunctionInfo &FuncInfo, | const SIMachineFunctionInfo &FuncInfo, | ||||
LivePhysRegs &LiveRegs, MachineFunction &MF, | LivePhysRegs &LiveRegs, MachineFunction &MF, | ||||
MachineBasicBlock &MBB, | MachineBasicBlock &MBB, | ||||
MachineBasicBlock::iterator I, const DebugLoc &DL, | MachineBasicBlock::iterator I, const DebugLoc &DL, | ||||
Register SpillReg, int FI) { | Register SpillReg, int FI, int64_t DwordOff = 0) { | ||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR | unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR | ||||
: AMDGPU::BUFFER_STORE_DWORD_OFFSET; | : AMDGPU::BUFFER_STORE_DWORD_OFFSET; | ||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | ||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); | ||||
MachineMemOperand *MMO = MF.getMachineMemOperand( | MachineMemOperand *MMO = MF.getMachineMemOperand( | ||||
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), | PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), | ||||
FrameInfo.getObjectAlign(FI)); | FrameInfo.getObjectAlign(FI)); | ||||
LiveRegs.addReg(SpillReg); | LiveRegs.addReg(SpillReg); | ||||
TRI.buildSpillLoadStore( | TRI.buildSpillLoadStore( | ||||
MBB, I, DL, Opc, FI, SpillReg, !MBB.isLiveIn(SpillReg), | MBB, I, DL, Opc, FI, SpillReg, !MBB.isLiveIn(SpillReg), | ||||
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); | FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO, nullptr, &LiveRegs); | ||||
LiveRegs.removeReg(SpillReg); | LiveRegs.removeReg(SpillReg); | ||||
} | } | ||||
static void buildEpilogRestore(const GCNSubtarget &ST, | static void buildEpilogRestore( | ||||
const SIRegisterInfo &TRI, | const GCNSubtarget &ST, const SIRegisterInfo &TRI, | ||||
const SIMachineFunctionInfo &FuncInfo, | const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, | ||||
LivePhysRegs &LiveRegs, MachineFunction &MF, | MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | ||||
MachineBasicBlock &MBB, | const DebugLoc &DL, Register SpillReg, int FI, int64_t DwordOff = 0) { | ||||
MachineBasicBlock::iterator I, | |||||
const DebugLoc &DL, Register SpillReg, int FI) { | |||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR | unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR | ||||
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET; | : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; | ||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | ||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); | ||||
MachineMemOperand *MMO = MF.getMachineMemOperand( | MachineMemOperand *MMO = MF.getMachineMemOperand( | ||||
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), | PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), | ||||
FrameInfo.getObjectAlign(FI)); | FrameInfo.getObjectAlign(FI)); | ||||
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, | TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, | ||||
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, | FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO, | ||||
&LiveRegs); | nullptr, &LiveRegs); | ||||
} | } | ||||
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | ||||
const DebugLoc &DL, const SIInstrInfo *TII, | const DebugLoc &DL, const SIInstrInfo *TII, | ||||
Register TargetReg) { | Register TargetReg) { | ||||
MachineFunction *MF = MBB.getParent(); | MachineFunction *MF = MBB.getParent(); | ||||
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); | const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); | ||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo(); | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); | ||||
Show All 11 Lines | static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | ||||
} | } | ||||
Register GitPtrLo = MFI->getGITPtrLoReg(*MF); | Register GitPtrLo = MFI->getGITPtrLoReg(*MF); | ||||
MF->getRegInfo().addLiveIn(GitPtrLo); | MF->getRegInfo().addLiveIn(GitPtrLo); | ||||
MBB.addLiveIn(GitPtrLo); | MBB.addLiveIn(GitPtrLo); | ||||
BuildMI(MBB, I, DL, SMovB32, TargetLo) | BuildMI(MBB, I, DL, SMovB32, TargetLo) | ||||
.addReg(GitPtrLo); | .addReg(GitPtrLo); | ||||
} | } | ||||
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, | |||||
const SIMachineFunctionInfo *FuncInfo, | |||||
MachineFunction &MF, MachineBasicBlock &MBB, | |||||
MachineBasicBlock::iterator MBBI, bool IsProlog) { | |||||
if (LiveRegs.empty()) { | |||||
LiveRegs.init(TRI); | |||||
if (IsProlog) { | |||||
LiveRegs.addLiveIns(MBB); | |||||
Not Done ReplyInline ActionsReally we ought to be stepping back from the end of the entry block and always using reverse liveness (but I guess you're just moving this function, so that's a separate change) arsenm: Really we ought to be stepping back from the end of the entry block and always using reverse… | |||||
} else { | |||||
// In epilog. | |||||
LiveRegs.addLiveOuts(MBB); | |||||
LiveRegs.stepBackward(*MBBI); | |||||
} | |||||
} | |||||
} | |||||
namespace llvm { | |||||
// SpillBuilder for saving/restoring custom SGPR spills. | |||||
// Custom spills are those special SGPR spills delayed until the current | |||||
// function's frame is finalized. The spills for FP, BP, etc. come under this | |||||
// category. For a given register, the builder uses the CustomSGPRSaveInfo to | |||||
// decide the spill method. | |||||
class CustomSGPRSpillBuilder { | |||||
MachineBasicBlock::iterator MI; | |||||
MachineBasicBlock &MBB; | |||||
MachineFunction &MF; | |||||
const GCNSubtarget &ST; | |||||
MachineFrameInfo &MFI; | |||||
SIMachineFunctionInfo *FuncInfo; | |||||
const SIInstrInfo *TII; | |||||
const SIRegisterInfo &TRI; | |||||
Register SuperReg; | |||||
const CustomSGPRSaveInfo SI; | |||||
LivePhysRegs &LiveRegs; | |||||
const DebugLoc &DL; | |||||
ArrayRef<int16_t> SplitParts; | |||||
unsigned NumSubRegs; | |||||
unsigned EltSize = 4; | |||||
void saveToMemory(const int FI) const { | |||||
MachineRegisterInfo &MRI = MF.getRegInfo(); | |||||
assert(!MFI.isDeadObjectIndex(FI)); | |||||
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true); | |||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( | |||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass); | |||||
if (!TmpVGPR) | |||||
report_fatal_error("failed to find free scratch register"); | |||||
for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { | |||||
Register SubReg = NumSubRegs == 1 | |||||
? SuperReg | |||||
: Register(TRI.getSubReg(SuperReg, SplitParts[I])); | |||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) | |||||
.addReg(SubReg); | |||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR, | |||||
FI, DwordOff); | |||||
DwordOff += 4; | |||||
} | |||||
} | |||||
void saveToVGPRLane(const int FI) const { | |||||
assert(!MFI.isDeadObjectIndex(FI)); | |||||
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); | |||||
ArrayRef<SIRegisterInfo::SpilledReg> Spill = | |||||
FuncInfo->getSGPRToVGPRCustomSpills(FI); | |||||
assert(Spill.size() == NumSubRegs); | |||||
for (unsigned I = 0; I < NumSubRegs; ++I) { | |||||
Register SubReg = NumSubRegs == 1 | |||||
? SuperReg | |||||
: Register(TRI.getSubReg(SuperReg, SplitParts[I])); | |||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR) | |||||
.addReg(SubReg) | |||||
.addImm(Spill[I].Lane) | |||||
.addReg(Spill[I].VGPR, RegState::Undef); | |||||
} | |||||
} | |||||
void copyToScratchSGPR(Register DstReg) const { | |||||
Not Done ReplyInline ActionsWhy does this code split the copies into SubRegs and copyFromScratchSGPR doesn’t? sebastian-ne: Why does this code split the copies into SubRegs and copyFromScratchSGPR doesn’t? | |||||
For the purpose of emitting CFI directives for each copy. There is no CFI emitted in the epilogue. cdevadas: For the purpose of emitting CFI directives for each copy. There is no CFI emitted in the… | |||||
Not Done ReplyInline ActionsHm, does that mean CFI cannot represent register pairs? sebastian-ne: Hm, does that mean CFI cannot represent register pairs?
If so, can we emit a single copy… | |||||
Sure, we will get the benefit for 64-bit copies as we have the 'S_MOV_B64' instruction. But for the CFI emission, we need composite expressions for register pairs. @scott.linder to comment about its impact. cdevadas: Sure, we will get the benefit for 64-bit copies as we have the 'S_MOV_B64' instruction. But for… | |||||
Not Done ReplyInline ActionsI agree with using the most efficient representation of the copy (sounds like a 64-bit MOV is best) and generating whatever CFI we need to describe it. In the case of the 64-bit program address space PC and the wave64 EXEC this will be a pair of SGPRs, described via a composite expression in the CFI, like how CD mentions. For every other case we can generate independent CFI instructions to separately describe the SubRegs which correspond to DWARF registers. For example, we don't have a DWARF register number for the pair SGPR40_SGPR41, whereas we do for SGPR40 and SGPR41 separately, so we can still emit something along the lines of: $sgpr40_sgpr41 = S_MOV_B64 $sgpr50_sgpr51 scott.linder: I agree with using the most efficient representation of the copy (sounds like a 64-bit MOV is… | |||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) | |||||
.addReg(SuperReg) | |||||
.setMIFlag(MachineInstr::FrameSetup); | |||||
} | |||||
void restoreFromMemory(const int FI) { | |||||
MachineRegisterInfo &MRI = MF.getRegInfo(); | |||||
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false); | |||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( | |||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass); | |||||
if (!TmpVGPR) | |||||
report_fatal_error("failed to find free scratch register"); | |||||
for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { | |||||
Register SubReg = NumSubRegs == 1 | |||||
? SuperReg | |||||
: Register(TRI.getSubReg(SuperReg, SplitParts[I])); | |||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR, | |||||
FI, DwordOff); | |||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) | |||||
.addReg(TmpVGPR, RegState::Kill); | |||||
DwordOff += 4; | |||||
} | |||||
} | |||||
void restoreFromVGPRLane(const int FI) { | |||||
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); | |||||
ArrayRef<SIRegisterInfo::SpilledReg> Spill = | |||||
FuncInfo->getSGPRToVGPRCustomSpills(FI); | |||||
assert(Spill.size() == NumSubRegs); | |||||
for (unsigned I = 0; I < NumSubRegs; ++I) { | |||||
Register SubReg = NumSubRegs == 1 | |||||
? SuperReg | |||||
: Register(TRI.getSubReg(SuperReg, SplitParts[I])); | |||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) | |||||
.addReg(Spill[I].VGPR) | |||||
.addImm(Spill[I].Lane); | |||||
} | |||||
} | |||||
void copyFromScratchSGPR(Register SrcReg) const { | |||||
BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg) | |||||
.addReg(SrcReg) | |||||
.setMIFlag(MachineInstr::FrameDestroy); | |||||
} | |||||
public: | |||||
CustomSGPRSpillBuilder(Register Reg, const CustomSGPRSaveInfo SI, | |||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, | |||||
const DebugLoc &DL, const SIInstrInfo *TII, | |||||
const SIRegisterInfo &TRI, LivePhysRegs &LiveRegs) | |||||
: MI(MI), MBB(MBB), MF(*MBB.getParent()), | |||||
ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()), | |||||
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), | |||||
SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL) { | |||||
const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); | |||||
SplitParts = TRI.getRegSplitParts(RC, EltSize); | |||||
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); | |||||
assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); | |||||
} | |||||
void save() { | |||||
switch (SI.getKind()) { | |||||
case SGPRSaveKind::SPILL_TO_MEM: | |||||
return saveToMemory(SI.getIndex()); | |||||
case SGPRSaveKind::SPILL_TO_VGPR_LANE: | |||||
return saveToVGPRLane(SI.getIndex()); | |||||
case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: | |||||
return copyToScratchSGPR(SI.getReg()); | |||||
} | |||||
} | |||||
void restore() { | |||||
switch (SI.getKind()) { | |||||
case SGPRSaveKind::SPILL_TO_MEM: | |||||
return restoreFromMemory(SI.getIndex()); | |||||
case SGPRSaveKind::SPILL_TO_VGPR_LANE: | |||||
return restoreFromVGPRLane(SI.getIndex()); | |||||
case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: | |||||
return copyFromScratchSGPR(SI.getReg()); | |||||
} | |||||
} | |||||
}; | |||||
} // namespace llvm | |||||
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` | // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` | ||||
void SIFrameLowering::emitEntryFunctionFlatScratchInit( | void SIFrameLowering::emitEntryFunctionFlatScratchInit( | ||||
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | ||||
const DebugLoc &DL, Register ScratchWaveOffsetReg) const { | const DebugLoc &DL, Register ScratchWaveOffsetReg) const { | ||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIInstrInfo *TII = ST.getInstrInfo(); | const SIInstrInfo *TII = ST.getInstrInfo(); | ||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo(); | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); | ||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | ||||
▲ Show 20 Lines • Show All 483 Lines • ▼ Show 20 Lines | case TargetStackID::SGPRSpill: | ||||
return true; | return true; | ||||
case TargetStackID::ScalableVector: | case TargetStackID::ScalableVector: | ||||
case TargetStackID::WasmLocal: | case TargetStackID::WasmLocal: | ||||
return false; | return false; | ||||
} | } | ||||
llvm_unreachable("Invalid TargetStackID::Value"); | llvm_unreachable("Invalid TargetStackID::Value"); | ||||
} | } | ||||
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, | |||||
const SIMachineFunctionInfo *FuncInfo, | |||||
MachineFunction &MF, MachineBasicBlock &MBB, | |||||
MachineBasicBlock::iterator MBBI, bool IsProlog) { | |||||
if (LiveRegs.empty()) { | |||||
LiveRegs.init(TRI); | |||||
if (IsProlog) { | |||||
LiveRegs.addLiveIns(MBB); | |||||
} else { | |||||
// In epilog. | |||||
LiveRegs.addLiveOuts(MBB); | |||||
LiveRegs.stepBackward(*MBBI); | |||||
} | |||||
} | |||||
} | |||||
// Activate all lanes, returns saved exec. | // Activate all lanes, returns saved exec. | ||||
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, | static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, | ||||
MachineFunction &MF, | MachineFunction &MF, | ||||
MachineBasicBlock &MBB, | MachineBasicBlock &MBB, | ||||
MachineBasicBlock::iterator MBBI, | MachineBasicBlock::iterator MBBI, | ||||
const DebugLoc &DL, bool IsProlog) { | const DebugLoc &DL, bool IsProlog) { | ||||
Register ScratchExecCopy; | Register ScratchExecCopy; | ||||
MachineRegisterInfo &MRI = MF.getRegInfo(); | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
Show All 15 Lines | const unsigned OrSaveExec = | ||||
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; | ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; | ||||
auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) | auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) | ||||
.addImm(-1); | .addImm(-1); | ||||
SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. | SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. | ||||
return ScratchExecCopy; | return ScratchExecCopy; | ||||
} | } | ||||
// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. | |||||
// Otherwise we are spilling to memory. | |||||
static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) { | |||||
const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||||
return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill; | |||||
} | |||||
void SIFrameLowering::emitPrologue(MachineFunction &MF, | void SIFrameLowering::emitPrologue(MachineFunction &MF, | ||||
MachineBasicBlock &MBB) const { | MachineBasicBlock &MBB) const { | ||||
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | ||||
if (FuncInfo->isEntryFunction()) { | if (FuncInfo->isEntryFunction()) { | ||||
emitEntryFunctionPrologue(MF, MBB); | emitEntryFunctionPrologue(MF, MBB); | ||||
return; | return; | ||||
} | } | ||||
MachineFrameInfo &MFI = MF.getFrameInfo(); | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
MachineRegisterInfo &MRI = MF.getRegInfo(); | |||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIInstrInfo *TII = ST.getInstrInfo(); | const SIInstrInfo *TII = ST.getInstrInfo(); | ||||
const SIRegisterInfo &TRI = TII->getRegisterInfo(); | const SIRegisterInfo &TRI = TII->getRegisterInfo(); | ||||
Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); | Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); | ||||
Register FramePtrReg = FuncInfo->getFrameOffsetReg(); | Register FramePtrReg = FuncInfo->getFrameOffsetReg(); | ||||
Register BasePtrReg = | Register BasePtrReg = | ||||
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); | TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); | ||||
LivePhysRegs LiveRegs; | LivePhysRegs LiveRegs; | ||||
MachineBasicBlock::iterator MBBI = MBB.begin(); | MachineBasicBlock::iterator MBBI = MBB.begin(); | ||||
// DebugLoc must be unknown since the first instruction with DebugLoc is used | // DebugLoc must be unknown since the first instruction with DebugLoc is used | ||||
// to determine the end of the prologue. | // to determine the end of the prologue. | ||||
DebugLoc DL; | DebugLoc DL; | ||||
bool HasFP = false; | bool HasFP = false; | ||||
bool HasBP = false; | bool HasBP = false; | ||||
uint32_t NumBytes = MFI.getStackSize(); | uint32_t NumBytes = MFI.getStackSize(); | ||||
uint32_t RoundedSize = NumBytes; | uint32_t RoundedSize = NumBytes; | ||||
// To avoid clobbering VGPRs in lanes that weren't active on function entry, | // To avoid clobbering VGPRs in lanes that weren't active on function entry, | ||||
// turn on all lanes before doing the spill to memory. | // turn on all lanes before doing the spill to memory. | ||||
Register ScratchExecCopy; | Register ScratchExecCopy; | ||||
Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex; | |||||
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex; | |||||
// Spill Whole-Wave Mode VGPRs. | // Spill Whole-Wave Mode VGPRs. | ||||
for (const auto &Reg : FuncInfo->getWWMSpills()) { | for (const auto &Reg : FuncInfo->getWWMSpills()) { | ||||
Register VGPR = Reg.first; | Register VGPR = Reg.first; | ||||
int FI = Reg.second; | int FI = Reg.second; | ||||
if (!ScratchExecCopy) | if (!ScratchExecCopy) | ||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, | ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, | ||||
/*IsProlog*/ true); | /*IsProlog*/ true); | ||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, FI); | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, FI); | ||||
} | } | ||||
if (ScratchExecCopy) { | if (ScratchExecCopy) { | ||||
// FIXME: Split block and make terminator. | // FIXME: Split block and make terminator. | ||||
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; | ||||
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; | ||||
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) | ||||
.addReg(ScratchExecCopy, RegState::Kill); | .addReg(ScratchExecCopy, RegState::Kill); | ||||
LiveRegs.addReg(ScratchExecCopy); | LiveRegs.addReg(ScratchExecCopy); | ||||
} | } | ||||
auto SaveSGPRToMemory = [&](Register Reg, const int FI) { | for (const auto &Spill : FuncInfo->getCustomSGPRSpills()) { | ||||
assert(!MFI.isDeadObjectIndex(FI)); | CustomSGPRSpillBuilder CSB(Spill.first, Spill.second, MBB, MBBI, DL, TII, | ||||
TRI, LiveRegs); | |||||
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); | CSB.save(); | ||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( | |||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass); | |||||
if (!TmpVGPR) | |||||
report_fatal_error("failed to find free scratch register"); | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) | |||||
.addReg(Reg); | |||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, | |||||
FI); | |||||
}; | |||||
auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) { | |||||
assert(!MFI.isDeadObjectIndex(FI)); | |||||
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); | |||||
ArrayRef<SIRegisterInfo::SpilledReg> Spill = | |||||
FuncInfo->getSGPRToVGPRCustomSpills(FI); | |||||
assert(Spill.size() == 1); | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) | |||||
.addReg(Reg) | |||||
.addImm(Spill[0].Lane) | |||||
.addReg(Spill[0].VGPR, RegState::Undef); | |||||
}; | |||||
if (FPSaveIndex) { | |||||
if (spilledToMemory(MF, *FPSaveIndex)) | |||||
SaveSGPRToMemory(FramePtrReg, *FPSaveIndex); | |||||
else | |||||
SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex); | |||||
} | |||||
// Emit the copy if we need an FP, and are using a free SGPR to save it. | |||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) { | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), | |||||
FuncInfo->SGPRForFPSaveRestoreCopy) | |||||
.addReg(FramePtrReg) | |||||
.setMIFlag(MachineInstr::FrameSetup); | |||||
} | } | ||||
if (BPSaveIndex) { | // If a copy to scratch SGPR has been chosen for any of the custom SGPR | ||||
if (spilledToMemory(MF, *BPSaveIndex)) | // spills, make such scratch registers live throughout the function. | ||||
SaveSGPRToMemory(BasePtrReg, *BPSaveIndex); | SmallVector<Register, 1> ScratchSGPRs; | ||||
else | FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs); | ||||
SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex); | if (!ScratchSGPRs.empty()) { | ||||
} | |||||
// Emit the copy if we need a BP, and are using a free SGPR to save it. | |||||
if (FuncInfo->SGPRForBPSaveRestoreCopy) { | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), | |||||
FuncInfo->SGPRForBPSaveRestoreCopy) | |||||
.addReg(BasePtrReg) | |||||
.setMIFlag(MachineInstr::FrameSetup); | |||||
} | |||||
// If a copy has been emitted for FP and/or BP, Make the SGPRs | |||||
// used in the copy instructions live throughout the function. | |||||
SmallVector<MCPhysReg, 2> TempSGPRs; | |||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) | |||||
TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); | |||||
if (FuncInfo->SGPRForBPSaveRestoreCopy) | |||||
TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); | |||||
if (!TempSGPRs.empty()) { | |||||
for (MachineBasicBlock &MBB : MF) { | for (MachineBasicBlock &MBB : MF) { | ||||
for (MCPhysReg Reg : TempSGPRs) | for (MCPhysReg Reg : ScratchSGPRs) | ||||
MBB.addLiveIn(Reg); | MBB.addLiveIn(Reg); | ||||
MBB.sortUniqueLiveIns(); | MBB.sortUniqueLiveIns(); | ||||
} | } | ||||
if (!LiveRegs.empty()) { | if (!LiveRegs.empty()) { | ||||
LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); | for (MCPhysReg Reg : ScratchSGPRs) | ||||
LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); | LiveRegs.addReg(Reg); | ||||
} | } | ||||
} | } | ||||
if (TRI.hasStackRealignment(MF)) { | if (TRI.hasStackRealignment(MF)) { | ||||
HasFP = true; | HasFP = true; | ||||
const unsigned Alignment = MFI.getMaxAlign().value(); | const unsigned Alignment = MFI.getMaxAlign().value(); | ||||
RoundedSize += Alignment; | RoundedSize += Alignment; | ||||
Show All 33 Lines | void SIFrameLowering::emitPrologue(MachineFunction &MF, | ||||
if (HasFP && RoundedSize != 0) { | if (HasFP && RoundedSize != 0) { | ||||
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) | auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) | ||||
.addReg(StackPtrReg) | .addReg(StackPtrReg) | ||||
.addImm(RoundedSize * getScratchScaleFactor(ST)) | .addImm(RoundedSize * getScratchScaleFactor(ST)) | ||||
.setMIFlag(MachineInstr::FrameSetup); | .setMIFlag(MachineInstr::FrameSetup); | ||||
Add->getOperand(3).setIsDead(); // Mark SCC as dead. | Add->getOperand(3).setIsDead(); // Mark SCC as dead. | ||||
} | } | ||||
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || | bool FPSaved = FuncInfo->hasCustomSGPRSpillEntry(FramePtrReg); | ||||
FuncInfo->FramePointerSaveIndex)) && | assert((!HasFP || FPSaved) && | ||||
"Needed to save FP but didn't save it anywhere"); | "Needed to save FP but didn't save it anywhere"); | ||||
// If we allow spilling to AGPRs we may have saved FP but then spill | // If we allow spilling to AGPRs we may have saved FP but then spill | ||||
// everything into AGPRs instead of the stack. | // everything into AGPRs instead of the stack. | ||||
assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && | assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) && | ||||
!FuncInfo->FramePointerSaveIndex) || | |||||
EnableSpillVGPRToAGPR) && | |||||
"Saved FP but didn't need it"); | "Saved FP but didn't need it"); | ||||
assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || | bool BPSaved = FuncInfo->hasCustomSGPRSpillEntry(BasePtrReg); | ||||
FuncInfo->BasePointerSaveIndex)) && | assert((!HasBP || BPSaved) && | ||||
"Needed to save BP but didn't save it anywhere"); | "Needed to save BP but didn't save it anywhere"); | ||||
assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && | assert((HasBP || !BPSaved) && "Saved BP but didn't need it"); | ||||
!FuncInfo->BasePointerSaveIndex)) && | |||||
"Saved BP but didn't need it"); | |||||
} | } | ||||
void SIFrameLowering::emitEpilogue(MachineFunction &MF, | void SIFrameLowering::emitEpilogue(MachineFunction &MF, | ||||
MachineBasicBlock &MBB) const { | MachineBasicBlock &MBB) const { | ||||
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | ||||
if (FuncInfo->isEntryFunction()) | if (FuncInfo->isEntryFunction()) | ||||
return; | return; | ||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIInstrInfo *TII = ST.getInstrInfo(); | const SIInstrInfo *TII = ST.getInstrInfo(); | ||||
MachineRegisterInfo &MRI = MF.getRegInfo(); | |||||
const SIRegisterInfo &TRI = TII->getRegisterInfo(); | const SIRegisterInfo &TRI = TII->getRegisterInfo(); | ||||
LivePhysRegs LiveRegs; | LivePhysRegs LiveRegs; | ||||
// Get the insert location for the epilogue. If there were no terminators in | // Get the insert location for the epilogue. If there were no terminators in | ||||
// the block, get the last instruction. | // the block, get the last instruction. | ||||
MachineBasicBlock::iterator MBBI = MBB.end(); | MachineBasicBlock::iterator MBBI = MBB.end(); | ||||
DebugLoc DL; | DebugLoc DL; | ||||
if (!MBB.empty()) { | if (!MBB.empty()) { | ||||
MBBI = MBB.getLastNonDebugInstr(); | MBBI = MBB.getLastNonDebugInstr(); | ||||
if (MBBI != MBB.end()) | if (MBBI != MBB.end()) | ||||
DL = MBBI->getDebugLoc(); | DL = MBBI->getDebugLoc(); | ||||
MBBI = MBB.getFirstTerminator(); | MBBI = MBB.getFirstTerminator(); | ||||
} | } | ||||
const MachineFrameInfo &MFI = MF.getFrameInfo(); | const MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
uint32_t NumBytes = MFI.getStackSize(); | uint32_t NumBytes = MFI.getStackSize(); | ||||
uint32_t RoundedSize = FuncInfo->isStackRealigned() | uint32_t RoundedSize = FuncInfo->isStackRealigned() | ||||
? NumBytes + MFI.getMaxAlign().value() | ? NumBytes + MFI.getMaxAlign().value() | ||||
: NumBytes; | : NumBytes; | ||||
const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); | const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); | ||||
const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); | |||||
const Register BasePtrReg = | |||||
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); | |||||
Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex; | |||||
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex; | |||||
if (RoundedSize != 0 && hasFP(MF)) { | if (RoundedSize != 0 && hasFP(MF)) { | ||||
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) | auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) | ||||
.addReg(StackPtrReg) | .addReg(StackPtrReg) | ||||
.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST))) | .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST))) | ||||
.setMIFlag(MachineInstr::FrameDestroy); | .setMIFlag(MachineInstr::FrameDestroy); | ||||
Add->getOperand(3).setIsDead(); // Mark SCC as dead. | Add->getOperand(3).setIsDead(); // Mark SCC as dead. | ||||
} | } | ||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) { | for (const auto &Spill : FuncInfo->getCustomSGPRSpills()) { | ||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) | CustomSGPRSpillBuilder CSB(Spill.first, Spill.second, MBB, MBBI, DL, TII, | ||||
.addReg(FuncInfo->SGPRForFPSaveRestoreCopy) | TRI, LiveRegs); | ||||
.setMIFlag(MachineInstr::FrameDestroy); | CSB.restore(); | ||||
} | |||||
if (FuncInfo->SGPRForBPSaveRestoreCopy) { | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) | |||||
.addReg(FuncInfo->SGPRForBPSaveRestoreCopy) | |||||
.setMIFlag(MachineInstr::FrameDestroy); | |||||
} | |||||
auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) { | |||||
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); | |||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( | |||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass); | |||||
if (!TmpVGPR) | |||||
report_fatal_error("failed to find free scratch register"); | |||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, | |||||
FI); | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg) | |||||
.addReg(TmpVGPR, RegState::Kill); | |||||
}; | |||||
auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) { | |||||
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); | |||||
ArrayRef<SIRegisterInfo::SpilledReg> Spill = | |||||
FuncInfo->getSGPRToVGPRCustomSpills(FI); | |||||
assert(Spill.size() == 1); | |||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg) | |||||
.addReg(Spill[0].VGPR) | |||||
.addImm(Spill[0].Lane); | |||||
}; | |||||
if (FPSaveIndex) { | |||||
const int FramePtrFI = *FPSaveIndex; | |||||
assert(!MFI.isDeadObjectIndex(FramePtrFI)); | |||||
if (spilledToMemory(MF, FramePtrFI)) | |||||
RestoreSGPRFromMemory(FramePtrReg, FramePtrFI); | |||||
else | |||||
RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI); | |||||
} | |||||
if (BPSaveIndex) { | |||||
const int BasePtrFI = *BPSaveIndex; | |||||
assert(!MFI.isDeadObjectIndex(BasePtrFI)); | |||||
if (spilledToMemory(MF, BasePtrFI)) | |||||
RestoreSGPRFromMemory(BasePtrReg, BasePtrFI); | |||||
else | |||||
RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI); | |||||
} | } | ||||
Register ScratchExecCopy; | Register ScratchExecCopy; | ||||
for (const auto &Reg : FuncInfo->getWWMSpills()) { | for (const auto &Reg : FuncInfo->getWWMSpills()) { | ||||
Register VGPR = Reg.first; | Register VGPR = Reg.first; | ||||
int FI = Reg.second; | int FI = Reg.second; | ||||
if (!ScratchExecCopy) | if (!ScratchExecCopy) | ||||
ScratchExecCopy = | ScratchExecCopy = | ||||
Show All 15 Lines | |||||
#ifndef NDEBUG | #ifndef NDEBUG | ||||
static bool allSGPRSpillsAreDead(const MachineFunction &MF) { | static bool allSGPRSpillsAreDead(const MachineFunction &MF) { | ||||
const MachineFrameInfo &MFI = MF.getFrameInfo(); | const MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | ||||
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); | ||||
I != E; ++I) { | I != E; ++I) { | ||||
if (!MFI.isDeadObjectIndex(I) && | if (!MFI.isDeadObjectIndex(I) && | ||||
MFI.getStackID(I) == TargetStackID::SGPRSpill && | MFI.getStackID(I) == TargetStackID::SGPRSpill && | ||||
(I != FuncInfo->FramePointerSaveIndex && | !FuncInfo->checkIndexInCustomSGPRSpills(I)) { | ||||
I != FuncInfo->BasePointerSaveIndex)) { | |||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
return true; | return true; | ||||
} | } | ||||
#endif | #endif | ||||
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines | if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) < | ||||
// freezeReservedRegs() so that getReservedRegs() can reserve this newly | // freezeReservedRegs() so that getReservedRegs() can reserve this newly | ||||
// identified VGPR (for AGPR copy). | // identified VGPR (for AGPR copy). | ||||
FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR); | FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR); | ||||
MRI.freezeReservedRegs(MF); | MRI.freezeReservedRegs(MF); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
// The special SGPR spills like the one needed for FP, BP or any reserved | |||||
// registers delayed until frame lowering. | |||||
void SIFrameLowering::determineCustomSGPRSaves(MachineFunction &MF, | |||||
BitVector &SavedVGPRs) const { | |||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | |||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | |||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | |||||
const SIRegisterInfo *TRI = ST.getRegisterInfo(); | |||||
LivePhysRegs LiveRegs; | |||||
LiveRegs.init(*TRI); | |||||
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); | |||||
for (unsigned I = 0; CSRegs[I]; ++I) | |||||
LiveRegs.addReg(CSRegs[I]); | |||||
// hasFP only knows about stack objects that already exist. We're now | |||||
// determining the stack slots that will be created, so we have to predict | |||||
// them. Stack objects force FP usage with calls. | |||||
// | |||||
// Note a new VGPR CSR may be introduced if one is used for the spill, but we | |||||
// don't want to report it here. | |||||
// | |||||
// FIXME: Is this really hasReservedCallFrame? | |||||
const bool WillHaveFP = | |||||
FrameInfo.hasCalls() && | |||||
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); | |||||
if (WillHaveFP || hasFP(MF)) { | |||||
Register FramePtrReg = MFI->getFrameOffsetReg(); | |||||
assert(!MFI->hasCustomSGPRSpillEntry(FramePtrReg) && | |||||
"Re-reserving spill slot for FP"); | |||||
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg); | |||||
} | |||||
if (TRI->hasBasePointer(MF)) { | |||||
Register BasePtrReg = TRI->getBaseRegister(); | |||||
assert(!MFI->hasCustomSGPRSpillEntry(BasePtrReg) && | |||||
"Re-reserving spill slot for BP"); | |||||
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg); | |||||
} | |||||
} | |||||
// Only report VGPRs to generic code. | // Only report VGPRs to generic code. | ||||
void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, | void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, | ||||
BitVector &SavedVGPRs, | BitVector &SavedVGPRs, | ||||
RegScavenger *RS) const { | RegScavenger *RS) const { | ||||
TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); | TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); | ||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | ||||
if (MFI->isEntryFunction()) | if (MFI->isEntryFunction()) | ||||
return; | return; | ||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo(); | |||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIRegisterInfo *TRI = ST.getRegisterInfo(); | const SIRegisterInfo *TRI = ST.getRegisterInfo(); | ||||
for (MachineBasicBlock &MBB : MF) { | for (MachineBasicBlock &MBB : MF) { | ||||
for (MachineInstr &MI : MBB) { | for (MachineInstr &MI : MBB) { | ||||
// WRITELANE instructions used for SGPR spills can overwrite the inactive | // WRITELANE instructions used for SGPR spills can overwrite the inactive | ||||
// lanes of VGPRs and callee must spill and restore them even if they are | // lanes of VGPRs and callee must spill and restore them even if they are | ||||
// marked Caller-saved. | // marked Caller-saved. | ||||
Show All 17 Lines | void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, | ||||
SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); | SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); | ||||
// Do not save AGPRs prior to GFX90A because there was no easy way to do so. | // Do not save AGPRs prior to GFX90A because there was no easy way to do so. | ||||
// In gfx908 there was do AGPR loads and stores and thus spilling also | // In gfx908 there was do AGPR loads and stores and thus spilling also | ||||
// require a temporary VGPR. | // require a temporary VGPR. | ||||
if (!ST.hasGFX90AInsts()) | if (!ST.hasGFX90AInsts()) | ||||
SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); | SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); | ||||
// hasFP only knows about stack objects that already exist. We're now | determineCustomSGPRSaves(MF, SavedVGPRs); | ||||
// determining the stack slots that will be created, so we have to predict | |||||
// them. Stack objects force FP usage with calls. | |||||
// | |||||
// Note a new VGPR CSR may be introduced if one is used for the spill, but we | |||||
// don't want to report it here. | |||||
// | |||||
// FIXME: Is this really hasReservedCallFrame? | |||||
const bool WillHaveFP = | |||||
FrameInfo.hasCalls() && | |||||
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); | |||||
// The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't | // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't | ||||
// allow the default insertion to handle them. | // allow the default insertion to handle them. | ||||
for (auto &Reg : MFI->getWWMSpills()) | for (auto &Reg : MFI->getWWMSpills()) | ||||
SavedVGPRs.reset(Reg.first); | SavedVGPRs.reset(Reg.first); | ||||
LivePhysRegs LiveRegs; | |||||
LiveRegs.init(*TRI); | |||||
if (WillHaveFP || hasFP(MF)) { | |||||
assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex && | |||||
"Re-reserving spill slot for FP"); | |||||
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, | |||||
MFI->FramePointerSaveIndex, true); | |||||
} | |||||
if (TRI->hasBasePointer(MF)) { | |||||
if (MFI->SGPRForFPSaveRestoreCopy) | |||||
LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); | |||||
assert(!MFI->SGPRForBPSaveRestoreCopy && | |||||
!MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP"); | |||||
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, | |||||
MFI->BasePointerSaveIndex, false); | |||||
} | |||||
} | } | ||||
void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, | void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, | ||||
BitVector &SavedRegs, | BitVector &SavedRegs, | ||||
RegScavenger *RS) const { | RegScavenger *RS) const { | ||||
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); | ||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | ||||
if (MFI->isEntryFunction()) | if (MFI->isEntryFunction()) | ||||
Show All 38 Lines | |||||
bool SIFrameLowering::assignCalleeSavedSpillSlots( | bool SIFrameLowering::assignCalleeSavedSpillSlots( | ||||
MachineFunction &MF, const TargetRegisterInfo *TRI, | MachineFunction &MF, const TargetRegisterInfo *TRI, | ||||
std::vector<CalleeSavedInfo> &CSI) const { | std::vector<CalleeSavedInfo> &CSI) const { | ||||
if (CSI.empty()) | if (CSI.empty()) | ||||
return true; // Early exit if no callee saved registers are modified! | return true; // Early exit if no callee saved registers are modified! | ||||
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); | ||||
if (!FuncInfo->SGPRForFPSaveRestoreCopy && | |||||
!FuncInfo->SGPRForBPSaveRestoreCopy) | |||||
return false; | |||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | ||||
const SIRegisterInfo *RI = ST.getRegisterInfo(); | const SIRegisterInfo *RI = ST.getRegisterInfo(); | ||||
Register FramePtrReg = FuncInfo->getFrameOffsetReg(); | Register FramePtrReg = FuncInfo->getFrameOffsetReg(); | ||||
Register BasePtrReg = RI->getBaseRegister(); | Register BasePtrReg = RI->getBaseRegister(); | ||||
Register SGPRForFPSaveRestoreCopy = | |||||
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); | |||||
Register SGPRForBPSaveRestoreCopy = | |||||
FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg); | |||||
if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy) | |||||
return false; | |||||
unsigned NumModifiedRegs = 0; | unsigned NumModifiedRegs = 0; | ||||
if (FuncInfo->SGPRForFPSaveRestoreCopy) | if (SGPRForFPSaveRestoreCopy) | ||||
NumModifiedRegs++; | NumModifiedRegs++; | ||||
if (FuncInfo->SGPRForBPSaveRestoreCopy) | if (SGPRForBPSaveRestoreCopy) | ||||
NumModifiedRegs++; | NumModifiedRegs++; | ||||
for (auto &CS : CSI) { | for (auto &CS : CSI) { | ||||
if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { | if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) { | ||||
CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); | CS.setDstReg(SGPRForFPSaveRestoreCopy); | ||||
if (--NumModifiedRegs) | if (--NumModifiedRegs) | ||||
break; | break; | ||||
} else if (CS.getReg() == BasePtrReg && | } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) { | ||||
FuncInfo->SGPRForBPSaveRestoreCopy) { | CS.setDstReg(SGPRForBPSaveRestoreCopy); | ||||
CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); | |||||
if (--NumModifiedRegs) | if (--NumModifiedRegs) | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines |
This function doesn’t do anything with CSRs, although the name says so?