diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -458,6 +458,9 @@ assert(ScratchWaveOffsetReg); if (MF.getFrameInfo().hasCalls()) { + // Note: Spilling code in SIRegisterInfo.cpp assumes that if !hasCalls() it + // can overwrite StackPtrOffsetReg - updates are required there if changes + // are made here to that assumption. Register SPReg = MFI->getStackPtrOffsetReg(); assert(SPReg != AMDGPU::SP_REG); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -738,6 +738,7 @@ bool Scavenged = false; MCRegister SOffset = ScratchOffsetReg; + bool UninitStackPtrOffset = false; const unsigned EltSize = 4; const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); @@ -776,8 +777,13 @@ // add the offset directly to the ScratchOffset or StackPtrOffset // register, and then subtract the offset after the spill to return the // register to it's original value. - if (!ScratchOffsetReg) + // In the case where StackPtrOffset is not initialized/otherwise used + // (hasCalls is false), we can just use the register directly with no + // adjustment required. + if (!ScratchOffsetReg) { ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg(); + UninitStackPtrOffset = !MFI.hasCalls(); + } SOffset = ScratchOffsetReg; ScratchOffsetRegDelta = Offset; } else { @@ -787,7 +793,7 @@ if (!SOffset) report_fatal_error("could not scavenge SGPR to spill in entry function"); - if (ScratchOffsetReg == AMDGPU::NoRegister) { + if (ScratchOffsetReg == AMDGPU::NoRegister || UninitStackPtrOffset) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset) .addImm(Offset); } else { @@ -855,7 +861,7 @@ MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); } - if (ScratchOffsetRegDelta != 0) { + if (!UninitStackPtrOffset && ScratchOffsetRegDelta != 0) { // Subtract the offset we added to the ScratchOffset register. BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset) .addReg(SOffset) diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -35,9 +35,8 @@ } ; CHECK-LABEL: test_limited_sgpr -; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9]+]] +; GFX6: s_mov_b32 s32, 0x{{[0-9]+}} ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32 -; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9]+]] ; GFX6: NumSgprs: 48 ; GFX6: ScratchSize: 8624 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 {