diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -95,6 +95,10 @@ cl::desc("Do not align and prefetch loops"), cl::init(false)); +static cl::opt VGPRReserveforSGPRSpill( + "amdgpu-reserve-vgpr-for-sgpr-spill", + cl::desc("Allocates one VGPR for future SGPR Spill"), cl::init(false)); + static bool hasFP32Denormals(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); return Info->getMode().allFP32Denormals(); @@ -10755,6 +10759,15 @@ } TargetLoweringBase::finalizeLowering(MF); + + // Allocate a VGPR for future SGPR Spill if + // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used + if (VGPRReserveforSGPRSpill) { + int fi = MF.getFrameInfo().CreateStackObject(4, 4, false, nullptr, + TargetStackID::SGPRSpill); + Info->allocateSGPRSpillToVGPR(MF, fi); + MF.getFrameInfo().RemoveStackObject(fi); + } } void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -308,6 +308,9 @@ for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) reserveRegisterTuples(Reserved, Reg); + for (auto SSpill : MFI->getSGPRSpillVGPRs()) + reserveRegisterTuples(Reserved, SSpill.VGPR); + return Reserved; } diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple amdgcn-amd-amdhsa --amdgpu-reserve-vgpr-for-sgpr-spill -mcpu=gfx803 -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s + +declare i32 @child_func(i32 %0, [255 x i32] %b) #0 +; GCN-LABEL: {{^}}parent_func: +; CHECK: v_writelane_b32 v32, s34, 4 +; CHECK: v_writelane_b32 v32, s30, 2 +; CHECK: v_writelane_b32 v32, s31, 3 +; CHECK: v_readlane_b32 s6, v32, 2 +; CHECK: v_readlane_b32 s7, v32, 3 +; CHECK: v_readlane_b32 s34, v32, 4 +; GCN: ; NumVgprs: 256 +define fastcc i32 @parent_func(i32 %0, i32 %1, [255 x i32] %b) #1 { +entry: + %ret0 = tail call fastcc i32 @child_func(i32 %0, [255 x i32] %b) + %res0 = add i32 %ret0, %0 + ret i32 %res0 +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "no-frame-pointer-elim"="true" } \ No newline at end of file