diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -257,6 +257,10 @@ if (Reg.VGPR == ReservedVGPR) { MBB.removeLiveIn(ReservedVGPR); MBB.addLiveIn(LowestAvailableVGPR); + + if (Reg.FI) + MF.getFrameInfo().RemoveStackObject(*Reg.FI); + Optional FI; if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR)) FI = FrameInfo.CreateSpillStackObject(4, Align(4)); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -349,10 +349,21 @@ const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); Register LaneVGPR = TRI->findUnusedRegister( MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true); - SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None)); + + Optional CSRSpillFI; + if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && + isCalleeSavedReg(CSRegs, LaneVGPR)) { + CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4)); + } + + SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR; return true; } @@ -575,6 +586,9 @@ MachineFunction &MF) { for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) { if (i->VGPR == ReservedVGPR) { + if (i->FI) + MF.getFrameInfo().RemoveStackObject(*i->FI); + SpillVGPRs.erase(i); for (MachineBasicBlock &MBB : MF) { diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -6,16 +6,20 @@ } ; GCN-LABEL: {{^}}parent_func: -; CHECK: v_writelane_b32 v255, s33, 2 -; CHECK: v_writelane_b32 v255, s30, 0 -; CHECK: v_writelane_b32 v255, s31, 1 +; CHECK: buffer_store_dword v255, off, s[0:3], s32 +; CHECK: v_writelane_b32 v255, s33, 2 +; CHECK: v_writelane_b32 v255, s30, 0 +; CHECK: v_writelane_b32 v255, s31, 1 ; CHECK: s_swappc_b64 s[30:31], s[4:5] -; CHECK: v_readlane_b32 s4, v255, 0 +; CHECK: v_readlane_b32 s4, v255, 0 ; CHECK: v_readlane_b32 s5, v255, 1 -; CHECK: v_readlane_b32 s33, v255, 2 +; CHECK: v_readlane_b32 s33, v255, 2 ; GCN: ; NumVgprs: 256 define void @parent_func() #0 { + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}