diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -33,6 +33,8 @@ RegScavenger *RS = nullptr) const override; void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const; + void determineCustomSGPRSaves(MachineFunction &MF, + BitVector &SavedRegs) const; bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -26,6 +26,18 @@ cl::ReallyHidden, cl::init(true)); +// Find a register matching \p RC from \p LiveRegs which is unused and available +// throughout the function. On failure, returns AMDGPU::NoRegister. +static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, + const LivePhysRegs &LiveRegs, + const TargetRegisterClass &RC) { + for (MCRegister Reg : RC) { + if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) + return Reg; + } + return MCRegister(); +} + // Find a scratch register that we can use in the prologue. We avoid using // callee-save registers since they may appear to be free when this is called // from canUseAsPrologue (during shrink wrapping), but then no longer be free @@ -39,65 +51,68 @@ for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); - if (Unused) { - // We are looking for a register that can be used throughout the entire - // function, so any use is unacceptable. - for (MCRegister Reg : RC) { - if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) - return Reg; - } - } else { - for (MCRegister Reg : RC) { - if (LiveRegs.available(MRI, Reg)) - return Reg; - } + // We are looking for a register that can be used throughout the entire + // function, so any use is unacceptable. + if (Unused) + return findUnusedRegister(MRI, LiveRegs, RC); + + for (MCRegister Reg : RC) { + if (LiveRegs.available(MRI, Reg)) + return Reg; } return MCRegister(); } -static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, - LivePhysRegs &LiveRegs, - Register &TempSGPR, - Optional &FrameIndex, - bool IsFP) { +static void getVGPRSpillLaneOrTempRegister( + MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR, + const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass) { SIMachineFunctionInfo *MFI = MF.getInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); + unsigned Size = TRI->getSpillSize(RC); + Align Alignment = TRI->getSpillAlign(RC); - // We need to save and restore the current FP/BP. + // We need to save and restore the given SGPR. - // 1: Try to save the FP/BP in an unused SGPR. - TempSGPR = findScratchNonCalleeSaveRegister( - MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); + // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs + // should have all the callee saved registers marked as used. + Register ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC); - if (!TempSGPR) { - int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, - TargetStackID::SGPRSpill); + if (!ScratchSGPR) { + int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr, + TargetStackID::SGPRSpill); if (TRI->spillSGPRToVGPR() && - MFI->allocateSGPRSpillToVGPR(MF, NewFI, /* IsPEI */ true)) { - // 2: There's no free lane to spill, and no free register to save FP/BP, - // so we're forced to spill another VGPR to use for the spill. - FrameIndex = NewFI; - - LLVM_DEBUG( - auto Spill = MFI->getSGPRToVGPRCustomSpills(NewFI).front(); - dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " - << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); + MFI->allocateSGPRSpillToVGPR(MF, FI, /* IsPEI */ true)) { + // 2: There's no free lane to spill, and no free register to save the + // SGPR, so we're forced to take another VGPR to use for the spill. + MFI->addToCustomSGPRSpills( + SGPR, CustomSGPRSaveInfo(SGPRSaveKind::SPILL_TO_VGPR_LANE, FI)); + + LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRCustomSpills(FI).front(); + dbgs() << printReg(SGPR, TRI) << " requires fallback spill to " + << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane + << '\n';); } else { - // Remove dead index - MF.getFrameInfo().RemoveStackObject(NewFI); - // 3: If all else fails, spill the FP/BP to memory. - FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); - LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " - << (IsFP ? "FP" : "BP") << '\n'); + // Remove dead index + MF.getFrameInfo().RemoveStackObject(FI); + // 3: If all else fails, spill the register to memory. + FI = FrameInfo.CreateSpillStackObject(Size, Alignment); + MFI->addToCustomSGPRSpills( + SGPR, CustomSGPRSaveInfo(SGPRSaveKind::SPILL_TO_MEM, FI)); + LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling " + << printReg(SGPR, TRI) << '\n'); } } else { - LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " - << printReg(TempSGPR, TRI) << '\n'); + MFI->addToCustomSGPRSpills( + SGPR, + CustomSGPRSaveInfo(SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR)); + LiveRegs.addReg(ScratchSGPR); + LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to " + << printReg(ScratchSGPR, TRI) << '\n'); } } @@ -109,7 +124,7 @@ LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, - Register SpillReg, int FI) { + Register SpillReg, int FI, int64_t DwordOff = 0) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; @@ -121,19 +136,17 @@ LiveRegs.addReg(SpillReg); bool IsKill = !MBB.isLiveIn(SpillReg); TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, - FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, - &LiveRegs); + FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO, + nullptr, &LiveRegs); if (IsKill) LiveRegs.removeReg(SpillReg); } -static void buildEpilogRestore(const GCNSubtarget &ST, - const SIRegisterInfo &TRI, - const SIMachineFunctionInfo &FuncInfo, - LivePhysRegs &LiveRegs, MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const DebugLoc &DL, Register SpillReg, int FI) { +static void buildEpilogRestore( + const GCNSubtarget &ST, const SIRegisterInfo &TRI, + const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, + MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, Register SpillReg, int FI, int64_t DwordOff = 0) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; @@ -143,8 +156,8 @@ PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, - FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, - &LiveRegs); + FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO, + nullptr, &LiveRegs); } static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -172,6 +185,180 @@ .addReg(GitPtrLo); } +static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, + const SIMachineFunctionInfo *FuncInfo, + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, bool IsProlog) { + if (LiveRegs.empty()) { + LiveRegs.init(TRI); + if (IsProlog) { + LiveRegs.addLiveIns(MBB); + } else { + // In epilog. + LiveRegs.addLiveOuts(MBB); + LiveRegs.stepBackward(*MBBI); + } + } +} + +namespace llvm { + +// SpillBuilder for saving/restoring custom SGPR spills. +// Custom spills are those special SGPR spills delayed until the current +// function's frame is finalized. The spills for FP, BP, etc. come under this +// category. For a given register, the builder uses the CustomSGPRSaveInfo to +// decide the spill method. +class CustomSGPRSpillBuilder { + MachineBasicBlock::iterator MI; + MachineBasicBlock &MBB; + MachineFunction &MF; + const GCNSubtarget &ST; + MachineFrameInfo &MFI; + SIMachineFunctionInfo *FuncInfo; + const SIInstrInfo *TII; + const SIRegisterInfo &TRI; + Register SuperReg; + const CustomSGPRSaveInfo SI; + LivePhysRegs &LiveRegs; + const DebugLoc &DL; + ArrayRef SplitParts; + unsigned NumSubRegs; + unsigned EltSize = 4; + + void saveToMemory(const int FI) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + assert(!MFI.isDeadObjectIndex(FI)); + + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true); + + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + if (!TmpVGPR) + report_fatal_error("failed to find free scratch register"); + + for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) + .addReg(SubReg); + + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR, + FI, DwordOff); + DwordOff += 4; + } + } + + void saveToVGPRLane(const int FI) const { + assert(!MFI.isDeadObjectIndex(FI)); + + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRToVGPRCustomSpills(FI); + assert(Spill.size() == NumSubRegs); + + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR) + .addReg(SubReg) + .addImm(Spill[I].Lane) + .addReg(Spill[I].VGPR, RegState::Undef); + } + } + + void copyToScratchSGPR(Register DstReg) const { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) + .addReg(SuperReg) + .setMIFlag(MachineInstr::FrameSetup); + } + + void restoreFromMemory(const int FI) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false); + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + if (!TmpVGPR) + report_fatal_error("failed to find free scratch register"); + + for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR, + FI, DwordOff); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) + .addReg(TmpVGPR, RegState::Kill); + DwordOff += 4; + } + } + + void restoreFromVGPRLane(const int FI) { + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRToVGPRCustomSpills(FI); + assert(Spill.size() == NumSubRegs); + + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill[I].VGPR) + .addImm(Spill[I].Lane); + } + } + + void copyFromScratchSGPR(Register SrcReg) const { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg) + .addReg(SrcReg) + .setMIFlag(MachineInstr::FrameDestroy); + } + +public: + CustomSGPRSpillBuilder(Register Reg, const CustomSGPRSaveInfo SI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, const SIInstrInfo *TII, + const SIRegisterInfo &TRI, LivePhysRegs &LiveRegs) + : MI(MI), MBB(MBB), MF(*MBB.getParent()), + ST(MF.getSubtarget()), MFI(MF.getFrameInfo()), + FuncInfo(MF.getInfo()), TII(TII), TRI(TRI), + SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + SplitParts = TRI.getRegSplitParts(RC, EltSize); + NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + + assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); + } + + void save() { + switch (SI.getKind()) { + case SGPRSaveKind::SPILL_TO_MEM: + return saveToMemory(SI.getIndex()); + case SGPRSaveKind::SPILL_TO_VGPR_LANE: + return saveToVGPRLane(SI.getIndex()); + case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: + return copyToScratchSGPR(SI.getReg()); + } + } + + void restore() { + switch (SI.getKind()) { + case SGPRSaveKind::SPILL_TO_MEM: + return restoreFromMemory(SI.getIndex()); + case SGPRSaveKind::SPILL_TO_VGPR_LANE: + return restoreFromVGPRLane(SI.getIndex()); + case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: + return copyFromScratchSGPR(SI.getReg()); + } + } +}; + +} // namespace llvm + // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` void SIFrameLowering::emitEntryFunctionFlatScratchInit( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -671,22 +858,6 @@ llvm_unreachable("Invalid TargetStackID::Value"); } -static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, - const SIMachineFunctionInfo *FuncInfo, - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, bool IsProlog) { - if (LiveRegs.empty()) { - LiveRegs.init(TRI); - if (IsProlog) { - LiveRegs.addLiveIns(MBB); - } else { - // In epilog. - LiveRegs.addLiveOuts(MBB); - LiveRegs.stepBackward(*MBBI); - } - } -} - // Activate all lanes, returns saved exec. static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, @@ -718,13 +889,6 @@ return ScratchExecCopy; } -// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. -// Otherwise we are spilling to memory. -static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill; -} - void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -734,7 +898,6 @@ } MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); @@ -758,9 +921,6 @@ // turn on all lanes before doing the spill to memory. Register ScratchExecCopy; - Optional FPSaveIndex = FuncInfo->FramePointerSaveIndex; - Optional BPSaveIndex = FuncInfo->BasePointerSaveIndex; - // Spill Whole-Wave Mode VGPRs. for (const auto &Reg : FuncInfo->getWWMSpills()) { Register VGPR = Reg.first; @@ -781,86 +941,26 @@ LiveRegs.addReg(ScratchExecCopy); } - auto SaveSGPRToMemory = [&](Register Reg, const int FI) { - assert(!MFI.isDeadObjectIndex(FI)); - - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(Reg); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - FI); - }; - - auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) { - assert(!MFI.isDeadObjectIndex(FI)); - - assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRCustomSpills(FI); - assert(Spill.size() == 1); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(Reg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); - }; - - if (FPSaveIndex) { - if (spilledToMemory(MF, *FPSaveIndex)) - SaveSGPRToMemory(FramePtrReg, *FPSaveIndex); - else - SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex); - } - - // Emit the copy if we need an FP, and are using a free SGPR to save it. - if (FuncInfo->SGPRForFPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), - FuncInfo->SGPRForFPSaveRestoreCopy) - .addReg(FramePtrReg) - .setMIFlag(MachineInstr::FrameSetup); - } - - if (BPSaveIndex) { - if (spilledToMemory(MF, *BPSaveIndex)) - SaveSGPRToMemory(BasePtrReg, *BPSaveIndex); - else - SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex); + for (const auto &Spill : FuncInfo->getCustomSGPRSpills()) { + CustomSGPRSpillBuilder CSB(Spill.first, Spill.second, MBB, MBBI, DL, TII, + TRI, LiveRegs); + CSB.save(); } - // Emit the copy if we need a BP, and are using a free SGPR to save it. - if (FuncInfo->SGPRForBPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), - FuncInfo->SGPRForBPSaveRestoreCopy) - .addReg(BasePtrReg) - .setMIFlag(MachineInstr::FrameSetup); - } - - // If a copy has been emitted for FP and/or BP, Make the SGPRs - // used in the copy instructions live throughout the function. - SmallVector TempSGPRs; - if (FuncInfo->SGPRForFPSaveRestoreCopy) - TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); - - if (FuncInfo->SGPRForBPSaveRestoreCopy) - TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); - - if (!TempSGPRs.empty()) { + // If a copy to scratch SGPR has been chosen for any of the custom SGPR + // spills, make such scratch registers live throughout the function. + SmallVector ScratchSGPRs; + FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs); + if (!ScratchSGPRs.empty()) { for (MachineBasicBlock &MBB : MF) { - for (MCPhysReg Reg : TempSGPRs) + for (MCPhysReg Reg : ScratchSGPRs) MBB.addLiveIn(Reg); MBB.sortUniqueLiveIns(); } if (!LiveRegs.empty()) { - LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); - LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); + for (MCPhysReg Reg : ScratchSGPRs) + LiveRegs.addReg(Reg); } } @@ -910,24 +1010,20 @@ Add->getOperand(3).setIsDead(); // Mark SCC as dead. } - assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || - FuncInfo->FramePointerSaveIndex)) && + bool FPSaved = FuncInfo->hasCustomSGPRSpillEntry(FramePtrReg); + assert((!HasFP || FPSaved) && "Needed to save FP but didn't save it anywhere"); // If we allow spilling to AGPRs we may have saved FP but then spill // everything into AGPRs instead of the stack. - assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && - !FuncInfo->FramePointerSaveIndex) || - EnableSpillVGPRToAGPR) && + assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) && "Saved FP but didn't need it"); - assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || - FuncInfo->BasePointerSaveIndex)) && + bool BPSaved = FuncInfo->hasCustomSGPRSpillEntry(BasePtrReg); + assert((!HasBP || BPSaved) && "Needed to save BP but didn't save it anywhere"); - assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && - !FuncInfo->BasePointerSaveIndex)) && - "Saved BP but didn't need it"); + assert((HasBP || !BPSaved) && "Saved BP but didn't need it"); } void SIFrameLowering::emitEpilogue(MachineFunction &MF, @@ -938,7 +1034,6 @@ const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); LivePhysRegs LiveRegs; // Get the insert location for the epilogue. If there were no terminators in @@ -959,12 +1054,6 @@ ? NumBytes + MFI.getMaxAlign().value() : NumBytes; const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); - const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); - const Register BasePtrReg = - TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); - - Optional FPSaveIndex = FuncInfo->FramePointerSaveIndex; - Optional BPSaveIndex = FuncInfo->BasePointerSaveIndex; if (RoundedSize != 0 && hasFP(MF)) { auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) @@ -974,56 +1063,10 @@ Add->getOperand(3).setIsDead(); // Mark SCC as dead. } - if (FuncInfo->SGPRForFPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) - .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) - .setMIFlag(MachineInstr::FrameDestroy); - } - - if (FuncInfo->SGPRForBPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) - .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) - .setMIFlag(MachineInstr::FrameDestroy); - } - - auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - FI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg) - .addReg(TmpVGPR, RegState::Kill); - }; - - auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) { - assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRCustomSpills(FI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - }; - - if (FPSaveIndex) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); - if (spilledToMemory(MF, FramePtrFI)) - RestoreSGPRFromMemory(FramePtrReg, FramePtrFI); - else - RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI); - } - - if (BPSaveIndex) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); - if (spilledToMemory(MF, BasePtrFI)) - RestoreSGPRFromMemory(BasePtrReg, BasePtrFI); - else - RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI); + for (const auto &Spill : FuncInfo->getCustomSGPRSpills()) { + CustomSGPRSpillBuilder CSB(Spill.first, Spill.second, MBB, MBBI, DL, TII, + TRI, LiveRegs); + CSB.restore(); } Register ScratchExecCopy; @@ -1055,8 +1098,7 @@ I != E; ++I) { if (!MFI.isDeadObjectIndex(I) && MFI.getStackID(I) == TargetStackID::SGPRSpill && - (I != FuncInfo->FramePointerSaveIndex && - I != FuncInfo->BasePointerSaveIndex)) { + !FuncInfo->checkIndexInCustomSGPRSpills(I)) { return false; } } @@ -1214,6 +1256,49 @@ } } +// The special SGPR spills like the one needed for FP, BP or any reserved +// registers delayed until frame lowering. +void SIFrameLowering::determineCustomSGPRSaves(MachineFunction &MF, + BitVector &SavedVGPRs) const { + MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + LivePhysRegs LiveRegs; + LiveRegs.init(*TRI); + // Initially mark callee saved registers as used so we will not choose them + // while looking for scratch SGPRs. + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + for (unsigned I = 0; CSRegs[I]; ++I) + LiveRegs.addReg(CSRegs[I]); + + // hasFP only knows about stack objects that already exist. We're now + // determining the stack slots that will be created, so we have to predict + // them. Stack objects force FP usage with calls. + // + // Note a new VGPR CSR may be introduced if one is used for the spill, but we + // don't want to report it here. + // + // FIXME: Is this really hasReservedCallFrame? + const bool WillHaveFP = + FrameInfo.hasCalls() && + (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); + + if (WillHaveFP || hasFP(MF)) { + Register FramePtrReg = MFI->getFrameOffsetReg(); + assert(!MFI->hasCustomSGPRSpillEntry(FramePtrReg) && + "Re-reserving spill slot for FP"); + getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg); + } + + if (TRI->hasBasePointer(MF)) { + Register BasePtrReg = TRI->getBaseRegister(); + assert(!MFI->hasCustomSGPRSpillEntry(BasePtrReg) && + "Re-reserving spill slot for BP"); + getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg); + } +} + // Only report VGPRs to generic code. void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedVGPRs, @@ -1223,7 +1308,6 @@ if (MFI->isEntryFunction()) return; - MachineFrameInfo &FrameInfo = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -1259,42 +1343,12 @@ if (!ST.hasGFX90AInsts()) SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); - // hasFP only knows about stack objects that already exist. We're now - // determining the stack slots that will be created, so we have to predict - // them. Stack objects force FP usage with calls. - // - // Note a new VGPR CSR may be introduced if one is used for the spill, but we - // don't want to report it here. - // - // FIXME: Is this really hasReservedCallFrame? - const bool WillHaveFP = - FrameInfo.hasCalls() && - (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); + determineCustomSGPRSaves(MF, SavedVGPRs); // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't // allow the default insertion to handle them. for (auto &Reg : MFI->getWWMSpills()) SavedVGPRs.reset(Reg.first); - - LivePhysRegs LiveRegs; - LiveRegs.init(*TRI); - - if (WillHaveFP || hasFP(MF)) { - assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex && - "Re-reserving spill slot for FP"); - getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, - MFI->FramePointerSaveIndex, true); - } - - if (TRI->hasBasePointer(MF)) { - if (MFI->SGPRForFPSaveRestoreCopy) - LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); - - assert(!MFI->SGPRForBPSaveRestoreCopy && - !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP"); - getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, - MFI->BasePointerSaveIndex, false); - } } void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, @@ -1349,29 +1403,31 @@ return true; // Early exit if no callee saved registers are modified! const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - if (!FuncInfo->SGPRForFPSaveRestoreCopy && - !FuncInfo->SGPRForBPSaveRestoreCopy) - return false; - const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *RI = ST.getRegisterInfo(); Register FramePtrReg = FuncInfo->getFrameOffsetReg(); Register BasePtrReg = RI->getBaseRegister(); + Register SGPRForFPSaveRestoreCopy = + FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); + Register SGPRForBPSaveRestoreCopy = + FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg); + if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy) + return false; + unsigned NumModifiedRegs = 0; - if (FuncInfo->SGPRForFPSaveRestoreCopy) + if (SGPRForFPSaveRestoreCopy) NumModifiedRegs++; - if (FuncInfo->SGPRForBPSaveRestoreCopy) + if (SGPRForBPSaveRestoreCopy) NumModifiedRegs++; for (auto &CS : CSI) { - if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { - CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); + if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) { + CS.setDstReg(SGPRForFPSaveRestoreCopy); if (--NumModifiedRegs) break; - } else if (CS.getReg() == BasePtrReg && - FuncInfo->SGPRForBPSaveRestoreCopy) { - CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); + } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) { + CS.setDstReg(SGPRForBPSaveRestoreCopy); if (--NumModifiedRegs) break; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -487,6 +487,13 @@ // the serialization easier. ReservedRegSet WWMReservedRegs; + using CustomSGPRSpillsMap = DenseMap; + // To track the custom SGPR spill method used for a CSR SGPR register during + // frame lowering. Even though the SGPR spills are handled during + // SILowerSGPRSpills pass, some special handling needed later during the + // PrologEpilogInserter. + CustomSGPRSpillsMap CustomSGPRSpills; + DenseMap VGPRToAGPRSpills; // AGPRs used for VGPR spills. @@ -511,19 +518,6 @@ VGPRForAGPRCopy = NewVGPRForAGPRCopy; } -public: // FIXME - /// If this is set, an SGPR used for save/restore of the register used for the - /// frame pointer. - Register SGPRForFPSaveRestoreCopy; - Optional FramePointerSaveIndex; - - /// If this is set, an SGPR used for save/restore of the register used for the - /// base pointer. - Register SGPRForBPSaveRestoreCopy; - Optional BasePointerSaveIndex; - - bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); - public: SIMachineFunctionInfo(const MachineFunction &MF); SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; @@ -552,6 +546,48 @@ const WWMSpillsMap &getWWMSpills() const { return WWMSpills; } const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } + const CustomSGPRSpillsMap &getCustomSGPRSpills() const { + return CustomSGPRSpills; + } + + void addToCustomSGPRSpills(Register Reg, CustomSGPRSaveInfo SI) { + CustomSGPRSpills.insert(std::make_pair(Reg, SI)); + } + + // Check if an entry created for \p Reg in the custom SGPR spills. Return true + // on success and false otherwise. + bool hasCustomSGPRSpillEntry(Register Reg) const { + return CustomSGPRSpills.find(Reg) != CustomSGPRSpills.end(); + } + + // Get the scratch SGPR if allocated to save/restore \p Reg. + Register getScratchSGPRCopyDstReg(Register Reg) const { + auto I = CustomSGPRSpills.find(Reg); + if (I != CustomSGPRSpills.end() && + I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) + return I->second.getReg(); + + return AMDGPU::NoRegister; + } + + // Get all scratch SGPRs allocated to copy/restore the custom SGPR spills. + void getAllScratchSGPRCopyDstRegs(SmallVectorImpl &Regs) const { + for (const auto &SI : CustomSGPRSpills) { + if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) + Regs.push_back(SI.second.getReg()); + } + } + + // Check if \p FI is allocated for any custom spill to a VGPR lane. + bool checkIndexInCustomSGPRSpills(int FI) const { + return find_if(CustomSGPRSpills, + [FI](const std::pair &SI) { + return SI.second.getKind() == + SGPRSaveKind::SPILL_TO_VGPR_LANE && + SI.second.getIndex() == FI; + }) != CustomSGPRSpills.end(); + } + ArrayRef getSGPRToVGPRCustomSpills(int FrameIndex) const { auto I = SGPRToVGPRCustomSpills.find(FrameIndex); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -281,16 +281,6 @@ VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment))); } -bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, - MCPhysReg Reg) { - for (unsigned I = 0; CSRegs[I]; ++I) { - if (CSRegs[I] == Reg) - return true; - } - - return false; -} - bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF, int FI, unsigned LaneIndex) { @@ -481,13 +471,13 @@ bool HaveSGPRToMemory = false; if (ResetSGPRSpillStackIDs) { - // All other SPGRs must be allocated on the default stack, so reset the + // All other SGPRs must be allocated on the default stack, so reset the // stack ID. - for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; - ++i) { - if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) { - if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { - MFI.setStackID(i, TargetStackID::Default); + for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; + ++I) { + if (!checkIndexInCustomSGPRSpills(I)) { + if (MFI.getStackID(I) == TargetStackID::SGPRSpill) { + MFI.setStackID(I, TargetStackID::Default); HaveSGPRToMemory = true; } } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -27,6 +27,34 @@ class RegisterBank; struct SGPRSpillBuilder; +// A CSR SGPR value can be preserved inside a callee using one of the following +// methods. +// 1. Copy to an unused scratch SGPR. +// 2. Spill to a VGPR lane. +// 3. Spill to memory via. a scratch VGPR. +// class CustomSGPRSaveInfo represents the save/restore method used for an SGPR +// at function prolog/epilog. +enum class SGPRSaveKind : uint8_t { + COPY_TO_SCRATCH_SGPR, + SPILL_TO_VGPR_LANE, + SPILL_TO_MEM +}; + +class CustomSGPRSaveInfo { + SGPRSaveKind Kind; + union { + int Index; + Register Reg; + }; + +public: + CustomSGPRSaveInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {} + CustomSGPRSaveInfo(SGPRSaveKind K, Register R) : Kind(K), Reg(R) {} + Register getReg() const { return Reg; } + int getIndex() const { return Index; } + SGPRSaveKind getKind() const { return Kind; } +}; + class SIRegisterInfo final : public AMDGPUGenRegisterInfo { private: const GCNSubtarget &ST;