Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -827,10 +827,6 @@ MachineFrameInfo &FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); - assert(SrcReg != MFI->getStackPtrOffsetReg() && - SrcReg != MFI->getFrameOffsetReg() && - SrcReg != MFI->getScratchWaveOffsetReg()); - unsigned Size = FrameInfo.getObjectSize(FrameIndex); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); MachinePointerInfo PtrInfo Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -658,6 +658,10 @@ if (SpillToSMEM && OnlyToVGPR) return false; + assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && + SuperReg != MFI->getFrameOffsetReg() && + SuperReg != MFI->getScratchWaveOffsetReg())); + assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); unsigned OffsetReg = AMDGPU::M0; Index: test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 < %s | FileCheck -check-prefix=GCN %s + +; For the CSR copy of s5, it may be possible to see it in +; storeRegToStackSlot. + +; GCN-LABEL: {{^}}spill_csr_s5_copy: +; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill +; GCN: v_writelane_b32 v32, s5, 2 +; GCN: s_swappc_b64 +; GCN: v_readlane_b32 s5, v32, 2 +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9 +; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4 +; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload +; GCN: s_setpc_b64 +define void @spill_csr_s5_copy() #0 { +bb: + %alloca = alloca i32, addrspace(5) + %tmp = tail call i64 @func() #1 + %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* null, i64 %tmp + %tmp2 = load i32, i32 addrspace(1)* %tmp1, align 4 + %tmp3 = zext i32 %tmp2 to i64 + store volatile i32 9, i32 addrspace(5)* %alloca + ret void +} + +declare i64 @func() + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }