diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -777,7 +777,7 @@ Optional FPSaveIndex = FuncInfo->FramePointerSaveIndex; Optional BPSaveIndex = FuncInfo->BasePointerSaveIndex; - // VGPRs used for SGPR->VGPR spills + // Spill Whole-Wave Mode VGPRs. for (const auto &Reg : FuncInfo->getWWMVGPRs()) { Register VGPR = Reg.first; Optional FI = Reg.second; @@ -792,15 +792,6 @@ *FI); } - for (auto ReservedWWM : FuncInfo->wwmAllocation()) { - if (!ScratchExecCopy) - ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, - /*IsProlog*/ true); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); - } - if (ScratchExecCopy) { // FIXME: Split block and make terminator. unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; @@ -1061,15 +1052,6 @@ *FI); } - for (auto ReservedWWM : FuncInfo->wwmAllocation()) { - if (!ScratchExecCopy) - ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); - } - if (ScratchExecCopy) { // FIXME: Split block and make terminator. unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; @@ -1117,11 +1099,6 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - if (!FuncInfo->isEntryFunction()) { - // Spill VGPRs used for Whole Wave Mode - FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI); - } - const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR; @@ -1269,6 +1246,13 @@ } } + // Add WWM reserved VGPRs. + for (Register Reg : MFI->getWWMReservedRegs()) { + const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); + MFI->addToWWMVGPRs(MF, Reg, TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC)); + } + for (MachineBasicBlock &MBB : MF) { for (auto &Reg : MFI->getWWMVGPRs()) MBB.addLiveIn(Reg.first); @@ -1297,8 +1281,8 @@ FrameInfo.hasCalls() && (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); - // VGPRs used for SGPR spilling need to be specially inserted in the prolog, - // so don't allow the default insertion to handle them. + // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't + // allow the default insertion to handle them. for (auto &Reg : MFI->getWWMVGPRs()) SavedVGPRs.reset(Reg.first); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -458,24 +458,6 @@ bool IsDead = false; }; - // Track VGPRs reserved for WWM. - SmallSetVector WWMReservedRegs; - - /// Track stack slots used for save/restore of reserved WWM VGPRs in the - /// prolog/epilog. - - /// FIXME: This is temporary state only needed in PrologEpilogInserter, and - /// doesn't really belong here. It does not require serialization - SmallVector WWMReservedFrameIndexes; - - void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, - const SIRegisterInfo &TRI); - - auto wwmAllocation() const { - assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size()); - return zip(WWMReservedRegs, WWMReservedFrameIndexes); - } - private: // Track VGPR + wave index for each subregister of the SGPR spilled to // frameindex key. @@ -491,6 +473,13 @@ // the VGPR and its stack slot index. WWMVGPRsMap WWMVGPRs; + using ReservedRegSet = SmallSetVector; + // To track the VGPRs reserved for WWM instructions. They get stack slots + // later during PrologEpilogInserter and get added into the superset WWMVGPRs + // for actual spilling. A separate set makes the register reserved part and + // the serialization easier. + ReservedRegSet WWMReservedRegs; + DenseMap VGPRToAGPRSpills; // AGPRs used for VGPR spills. @@ -542,9 +531,7 @@ PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange); - void reserveWWMRegister(Register Reg) { - WWMReservedRegs.insert(Reg); - } + void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); } ArrayRef getSGPRToVGPRSpills(int FrameIndex) const { @@ -556,6 +543,7 @@ ArrayRef getSGPRSpillVGPRs() const { return SpillVGPRs; } const WWMVGPRsMap &getWWMVGPRs() const { return WWMVGPRs; } + const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } void addToWWMVGPRs(MachineFunction &MF, Register VGPR, uint64_t Size = 4, Align Alignment = Align(4)); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -463,20 +463,6 @@ return HaveSGPRToMemory; } -void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( - MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { - assert(WWMReservedFrameIndexes.empty()); - - WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); - - int I = 0; - for (Register VGPR : WWMReservedRegs) { - const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); - WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( - TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); - } -} - int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { if (ScavengeFI) @@ -602,7 +588,7 @@ BytesInStackArgArea(MFI.getBytesInStackArgArea()), ReturnsVoid(MFI.returnsVoid()), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { - for (Register Reg : MFI.WWMReservedRegs) + for (Register Reg : MFI.getWWMReservedRegs()) WWMReservedRegs.push_back(regToString(Reg, TRI)); if (MFI.getVGPRForAGPRCopy()) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -701,7 +701,7 @@ reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); } - for (Register Reg : MFI->WWMReservedRegs) + for (Register Reg : MFI->getWWMReservedRegs()) reserveRegisterTuples(Reserved, Reg); // FIXME: Stop using reserved registers for this.