Index: llvm/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1218,7 +1218,11 @@ } } - FuncInfo->removeDeadFrameIndices(MFI); + // At this point we've already allocated all spilled SGPRs to VGPRs if we + // can. Any remaining SGPR spills will go to memory, so move them back to the + // default stack. + bool HaveSGPRToVMemSpill = + FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true); assert(allSGPRSpillsAreDead(MF) && "SGPR spill should have been removed in SILowerSGPRSpills"); @@ -1230,6 +1234,13 @@ // Add an emergency spill slot RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); + + // If we are spilling SGPRs to memory with a large frame, we may need a + // second VGPR emergency frame index. + if (HaveSGPRToVMemSpill && + allocateScavengingFrameIndexesNearIncomingSP(MF)) { + RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false)); + } } } Index: llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -372,7 +372,7 @@ // free frame index ids by the later pass(es) like "stack slot coloring" // which in turn could mess-up with the book keeping of "frame index to VGPR // lane". - FuncInfo->removeDeadFrameIndices(MFI); + FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); MadeChange = true; } else if (FuncInfo->VGPRReservedForSGPRSpill) { Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -551,7 +551,11 @@ bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); bool reserveVGPRforSGPRSpills(MachineFunction &MF); bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); - void removeDeadFrameIndices(MachineFrameInfo &MFI); + + /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill + /// to the default stack. + bool removeDeadFrameIndices(MachineFrameInfo &MFI, + bool ResetSGPRSpillStackIDs); int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); Optional getOptionalScavengeFI() const { return ScavengeFI; } Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -428,7 +428,8 @@ return Spill.FullyAllocated; } -void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { +bool SIMachineFunctionInfo::removeDeadFrameIndices( + MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { // Remove dead frame indices from function frame, however keep FP & BP since // spills for them haven't been inserted yet. And also make sure to remove the // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could @@ -441,17 +442,28 @@ } } - // All other SPGRs must be allocated on the default stack, so reset the stack - // ID. - for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; - ++i) - if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) - MFI.setStackID(i, TargetStackID::Default); + bool HaveSGPRToMemory = false; + + if (ResetSGPRSpillStackIDs) { + // All other SPGRs must be allocated on the default stack, so reset the + // stack ID. + for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; + ++i) { + if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) { + if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { + MFI.setStackID(i, TargetStackID::Default); + HaveSGPRToMemory = true; + } + } + } + } for (auto &R : VGPRToAGPRSpills) { if (R.second.FullyAllocated) MFI.RemoveStackObject(R.first); } + + return HaveSGPRToMemory; } int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, Index: llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir @@ -0,0 +1,54 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s + +# Check that we allocate 2 emergency stack slots if we're spilling +# SGPRs to memory and potentially have an offset larger than fits in +# the addressing mode of the memory instructions. + +# CHECK-LABEL: name: test +# CHECK: stack: +# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + + +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) +# CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr0 +# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) +# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + + +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) +# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) +# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr0, 0 +# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) +--- +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } + - { id: 1, size: 4096, alignment: 4 } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11 + S_CMP_EQ_U32 0, 0, implicit-def $scc + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_SETPC_B64 $sgpr30_sgpr31, implicit $scc +...