diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -792,15 +792,6 @@ *FI); } - for (auto ReservedWWM : FuncInfo->wwmAllocation()) { - if (!ScratchExecCopy) - ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, - /*IsProlog*/ true); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); - } - if (ScratchExecCopy) { // FIXME: Split block and make terminator. unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; @@ -1061,15 +1052,6 @@ *FI); } - for (auto ReservedWWM : FuncInfo->wwmAllocation()) { - if (!ScratchExecCopy) - ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); - } - if (ScratchExecCopy) { // FIXME: Split block and make terminator. unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; @@ -1117,11 +1099,6 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - if (!FuncInfo->isEntryFunction()) { - // Spill VGPRs used for Whole Wave Mode - FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI); - } - const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR; @@ -1264,6 +1241,13 @@ } } + // Add WWM reserved VGPRs. + for (Register Reg : MFI->getWWMReservedRegs()) { + const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); + MFI->addToLaneVGPRs(MF, Reg, TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC)); + } + for (MachineBasicBlock &MBB : MF) { for (auto &Reg : MFI->getLaneVGPRs()) MBB.addLiveIn(Reg.first); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -479,24 +479,6 @@ bool IsDead = false; }; - // Track VGPRs reserved for WWM. - SmallSetVector WWMReservedRegs; - - /// Track stack slots used for save/restore of reserved WWM VGPRs in the - /// prolog/epilog. - - /// FIXME: This is temporary state only needed in PrologEpilogInserter, and - /// doesn't really belong here. It does not require serialization - SmallVector WWMReservedFrameIndexes; - - void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, - const SIRegisterInfo &TRI); - - auto wwmAllocation() const { - assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size()); - return zip(WWMReservedRegs, WWMReservedFrameIndexes); - } - private: // Track VGPR + wave index for each subregister of the SGPR spilled to // frameindex key. @@ -506,6 +488,10 @@ using LaneVGPRsMap = MapVector>; LaneVGPRsMap LaneVGPRs; + using ReservedRegSet = SmallSetVector; + // Track VGPRs reserved for WWM. + ReservedRegSet WWMReservedRegs; + DenseMap VGPRToAGPRSpills; // AGPRs used for VGPR spills. @@ -551,9 +537,7 @@ PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange); - void reserveWWMRegister(Register Reg) { - WWMReservedRegs.insert(Reg); - } + void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); } ArrayRef getSGPRToVGPRSpills(int FrameIndex) const { auto I = SGPRToVGPRSpills.find(FrameIndex); @@ -563,6 +547,7 @@ ArrayRef getSGPRSpillVGPRs() const { return SpillVGPRs; } const LaneVGPRsMap &getLaneVGPRs() const { return LaneVGPRs; } + const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } void addToLaneVGPRs(MachineFunction &MF, Register VGPR, uint64_t Size = 4, Align Alignment = Align(4)); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -456,20 +456,6 @@ return HaveSGPRToMemory; } -void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( - MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { - assert(WWMReservedFrameIndexes.empty()); - - WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); - - int I = 0; - for (Register VGPR : WWMReservedRegs) { - const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); - WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( - TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); - } -} - int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { if (ScavengeFI) @@ -595,7 +581,7 @@ BytesInStackArgArea(MFI.getBytesInStackArgArea()), ReturnsVoid(MFI.returnsVoid()), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { - for (Register Reg : MFI.WWMReservedRegs) + for (Register Reg : MFI.getWWMReservedRegs()) WWMReservedRegs.push_back(regToString(Reg, TRI)); if (MFI.getVGPRForAGPRCopy()) diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -148,6 +148,7 @@ const Register PhysReg = VRM->getPhys(Reg); assert(PhysReg != 0); + // To be spilled/restored in the prologue/epilogue. MFI->reserveWWMRegister(PhysReg); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -701,7 +701,7 @@ reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); } - for (Register Reg : MFI->WWMReservedRegs) + for (Register Reg : MFI->getWWMReservedRegs()) reserveRegisterTuples(Reserved, Reg); // FIXME: Stop using reserved registers for this.