diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1137,6 +1137,7 @@ BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); bool SeenDbgInstr = false; + bool HasUnreservedSpill = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { @@ -1160,6 +1161,7 @@ SpillFIs.set(FI); continue; } + HasUnreservedSpill |= FuncInfo->VGPRToAGPRSpillNeedsReserve(FI); } else if (TII->isStoreToStackSlot(MI, FrameIndex) || TII->isLoadFromStackSlot(MI, FrameIndex)) if (!MFI.isFixedObjectIndex(FrameIndex)) @@ -1167,6 +1169,11 @@ } } + if (HasUnreservedSpill) { + MRI.freezeReservedRegs(MF); + HasUnreservedSpill = false; + } + // Stack slot coloring may assign different objects to the same stack slot. // If not, then the VGPR to AGPR spill slot is dead. for (unsigned FI : SpillFIs.set_bits()) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -445,6 +445,7 @@ SmallVector Lanes; bool FullyAllocated = false; bool IsDead = false; + bool NeedsReserve = false; }; // Track VGPRs reserved for WWM. @@ -561,6 +562,16 @@ I->second.IsDead = true; } + bool VGPRToAGPRSpillNeedsReserve(int FrameIndex) { + auto I = VGPRToAGPRSpills.find(FrameIndex); + if (I == VGPRToAGPRSpills.end()) + return false; + + bool IsUnreserved = !I->getSecond().NeedsReserve; + I->second.NeedsReserve = true; + return IsUnreserved; + } + bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -426,6 +426,7 @@ OtherUsedRegs.set(*NextSpillReg); SpillRegs.push_back(*NextSpillReg); Spill.Lanes[I] = *NextSpillReg++; + Spill.NeedsReserve = true; } return Spill.FullyAllocated; diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -269,10 +269,10 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -762,11 +762,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -1286,11 +1286,11 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -1782,11 +1782,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -2308,12 +2308,12 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -2807,11 +2807,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -3329,10 +3329,10 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -3822,11 +3822,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -4345,11 +4345,11 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -4841,11 +4841,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -5365,12 +5365,12 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -5864,11 +5864,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/copyVGPRClobberSpillVGPR.mir b/llvm/test/CodeGen/AMDGPU/copyVGPRClobberSpillVGPR.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/copyVGPRClobberSpillVGPR.mir @@ -0,0 +1,511 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX908 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX90A %s + +--- | + + define amdgpu_kernel void @test_spill() #0 { + ; GFX908-LABEL: test_spill: + ; GFX908: ; %bb.0: + ; GFX908-NEXT: ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 + ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 + ; GFX908-NEXT: ; implicit-def: $vgpr0 + ; GFX908-NEXT: ; implicit-def: $vgpr1 + ; GFX908-NEXT: ; implicit-def: $vgpr2 + ; GFX908-NEXT: ; implicit-def: $vgpr3 + ; GFX908-NEXT: ; implicit-def: $vgpr4 + ; GFX908-NEXT: ; implicit-def: $vgpr5 + ; GFX908-NEXT: ; implicit-def: $vgpr6 + ; GFX908-NEXT: ; implicit-def: $vgpr7 + ; GFX908-NEXT: ; implicit-def: $vgpr8 + ; GFX908-NEXT: ; implicit-def: $vgpr9 + ; GFX908-NEXT: ; implicit-def: $vgpr10 + ; GFX908-NEXT: ; implicit-def: $vgpr11 + ; GFX908-NEXT: ; implicit-def: $vgpr12 + ; GFX908-NEXT: ; implicit-def: $vgpr13 + ; GFX908-NEXT: ; implicit-def: $vgpr14 + ; GFX908-NEXT: ; implicit-def: $vgpr15 + ; GFX908-NEXT: ; implicit-def: $vgpr16 + ; GFX908-NEXT: ; implicit-def: $vgpr17 + ; GFX908-NEXT: ; implicit-def: $vgpr18 + ; GFX908-NEXT: ; implicit-def: $vgpr19 + ; GFX908-NEXT: ; implicit-def: $vgpr20 + ; GFX908-NEXT: ; implicit-def: $vgpr21 + ; GFX908-NEXT: ; implicit-def: $vgpr22 + ; GFX908-NEXT: ; implicit-def: $vgpr23 + ; GFX908-NEXT: ; implicit-def: $vgpr24 + ; GFX908-NEXT: ; implicit-def: $vgpr25 + ; GFX908-NEXT: ; implicit-def: $vgpr26 + ; GFX908-NEXT: ; implicit-def: $vgpr27 + ; GFX908-NEXT: ; implicit-def: $vgpr28 + ; GFX908-NEXT: ; implicit-def: $vgpr29 + ; GFX908-NEXT: ; implicit-def: $vgpr30 + ; GFX908-NEXT: ; implicit-def: $vgpr31 + ; GFX908-NEXT: ; implicit-def: $vgpr32 + ; GFX908-NEXT: ; implicit-def: $vgpr33 + ; GFX908-NEXT: ; implicit-def: $vgpr34 + ; GFX908-NEXT: ; implicit-def: $vgpr35 + ; GFX908-NEXT: ; implicit-def: $vgpr36 + ; GFX908-NEXT: ; implicit-def: $vgpr37 + ; GFX908-NEXT: ; implicit-def: $vgpr38 + ; GFX908-NEXT: ; implicit-def: $vgpr39 + ; GFX908-NEXT: ; implicit-def: $vgpr40 + ; GFX908-NEXT: ; implicit-def: $vgpr41 + ; GFX908-NEXT: ; implicit-def: $vgpr42 + ; GFX908-NEXT: ; implicit-def: $vgpr43 + ; GFX908-NEXT: ; implicit-def: $vgpr44 + ; GFX908-NEXT: ; implicit-def: $vgpr45 + ; GFX908-NEXT: ; implicit-def: $vgpr46 + ; GFX908-NEXT: ; implicit-def: $vgpr47 + ; GFX908-NEXT: ; implicit-def: $vgpr48 + ; GFX908-NEXT: ; implicit-def: $vgpr49 + ; GFX908-NEXT: ; implicit-def: $vgpr50 + ; GFX908-NEXT: ; implicit-def: $vgpr51 + ; GFX908-NEXT: ; implicit-def: $vgpr52 + ; GFX908-NEXT: ; implicit-def: $vgpr53 + ; GFX908-NEXT: ; implicit-def: $vgpr54 + ; GFX908-NEXT: ; implicit-def: $vgpr55 + ; GFX908-NEXT: ; implicit-def: $vgpr56 + ; GFX908-NEXT: ; implicit-def: $vgpr57 + ; GFX908-NEXT: ; implicit-def: $vgpr58 + ; GFX908-NEXT: ; implicit-def: $vgpr59 + ; GFX908-NEXT: ; implicit-def: $vgpr60 + ; GFX908-NEXT: ; implicit-def: $vgpr61 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a64, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a65, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a66, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a67, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a68, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a69, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a70, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a71, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a72, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a73, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a74, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a75, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a76, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a77, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a78, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a79, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX908-NEXT: v_accvgpr_read_b32 v62, a111 ; Reload Reuse + ; GFX908-NEXT: s_nop 0 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill + ; + ; GFX90A-LABEL: test_spill: + ; GFX90A: ; %bb.0: + ; GFX90A-NEXT: ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 + ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a96 + ; GFX90A-NEXT: ; implicit-def: $vgpr0 + ; GFX90A-NEXT: ; implicit-def: $vgpr1 + ; GFX90A-NEXT: ; implicit-def: $vgpr2 + ; GFX90A-NEXT: ; implicit-def: $vgpr3 + ; GFX90A-NEXT: ; implicit-def: $vgpr4 + ; GFX90A-NEXT: ; implicit-def: $vgpr5 + ; GFX90A-NEXT: ; implicit-def: $vgpr6 + ; GFX90A-NEXT: ; implicit-def: $vgpr7 + ; GFX90A-NEXT: ; implicit-def: $vgpr8 + ; GFX90A-NEXT: ; implicit-def: $vgpr9 + ; GFX90A-NEXT: ; implicit-def: $vgpr10 + ; GFX90A-NEXT: ; implicit-def: $vgpr11 + ; GFX90A-NEXT: ; implicit-def: $vgpr12 + ; GFX90A-NEXT: ; implicit-def: $vgpr13 + ; GFX90A-NEXT: ; implicit-def: $vgpr14 + ; GFX90A-NEXT: ; implicit-def: $vgpr15 + ; GFX90A-NEXT: ; implicit-def: $vgpr16 + ; GFX90A-NEXT: ; implicit-def: $vgpr17 + ; GFX90A-NEXT: ; implicit-def: $vgpr18 + ; GFX90A-NEXT: ; implicit-def: $vgpr19 + ; GFX90A-NEXT: ; implicit-def: $vgpr20 + ; GFX90A-NEXT: ; implicit-def: $vgpr21 + ; GFX90A-NEXT: ; implicit-def: $vgpr22 + ; GFX90A-NEXT: ; implicit-def: $vgpr23 + ; GFX90A-NEXT: ; implicit-def: $vgpr24 + ; GFX90A-NEXT: ; implicit-def: $vgpr25 + ; GFX90A-NEXT: ; implicit-def: $vgpr26 + ; GFX90A-NEXT: ; implicit-def: $vgpr27 + ; GFX90A-NEXT: ; implicit-def: $vgpr28 + ; GFX90A-NEXT: ; implicit-def: $vgpr29 + ; GFX90A-NEXT: ; implicit-def: $vgpr30 + ; GFX90A-NEXT: ; implicit-def: $vgpr31 + ; GFX90A-NEXT: ; implicit-def: $vgpr32 + ; GFX90A-NEXT: ; implicit-def: $vgpr33 + ; GFX90A-NEXT: ; implicit-def: $vgpr34 + ; GFX90A-NEXT: ; implicit-def: $vgpr35 + ; GFX90A-NEXT: ; implicit-def: $vgpr36 + ; GFX90A-NEXT: ; implicit-def: $vgpr37 + ; GFX90A-NEXT: ; implicit-def: $vgpr38 + ; GFX90A-NEXT: ; implicit-def: $vgpr39 + ; GFX90A-NEXT: ; implicit-def: $vgpr40 + ; GFX90A-NEXT: ; implicit-def: $vgpr41 + ; GFX90A-NEXT: ; implicit-def: $vgpr42 + ; GFX90A-NEXT: ; implicit-def: $vgpr43 + ; GFX90A-NEXT: ; implicit-def: $vgpr44 + ; GFX90A-NEXT: ; implicit-def: $vgpr45 + ; GFX90A-NEXT: ; implicit-def: $vgpr46 + ; GFX90A-NEXT: ; implicit-def: $vgpr47 + ; GFX90A-NEXT: ; implicit-def: $vgpr48 + ; GFX90A-NEXT: ; implicit-def: $vgpr49 + ; GFX90A-NEXT: ; implicit-def: $vgpr50 + ; GFX90A-NEXT: ; implicit-def: $vgpr51 + ; GFX90A-NEXT: ; implicit-def: $vgpr52 + ; GFX90A-NEXT: ; implicit-def: $vgpr53 + ; GFX90A-NEXT: ; implicit-def: $vgpr54 + ; GFX90A-NEXT: ; implicit-def: $vgpr55 + ; GFX90A-NEXT: ; implicit-def: $vgpr56 + ; GFX90A-NEXT: ; implicit-def: $vgpr57 + ; GFX90A-NEXT: ; implicit-def: $vgpr58 + ; GFX90A-NEXT: ; implicit-def: $vgpr59 + ; GFX90A-NEXT: ; implicit-def: $vgpr60 + ; GFX90A-NEXT: ; implicit-def: $vgpr61 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a64, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a97 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a65, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a98 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a66, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a99 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a67, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a100 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a68, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a101 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a69, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a102 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a70, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a103 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a71, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a104 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a72, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a105 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a73, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a106 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a74, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a107 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a75, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a108 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a76, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a109 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a77, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a110 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a78, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a111 + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: v_accvgpr_write_b32 a79, v63 + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v62, a111 ; Reload Reuse + ; GFX90A-NEXT: s_nop 0 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a111 ; Reload Reuse + ; GFX90A-NEXT: s_nop 1 + ; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="4,4" } + +... +--- +name: test_spill +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 } + +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = IMPLICIT_DEF + $vgpr8 = IMPLICIT_DEF + $vgpr9 = IMPLICIT_DEF + $vgpr10 = IMPLICIT_DEF + $vgpr11 = IMPLICIT_DEF + $vgpr12 = IMPLICIT_DEF + $vgpr13 = IMPLICIT_DEF + $vgpr14 = IMPLICIT_DEF + $vgpr15 = IMPLICIT_DEF + $vgpr16 = IMPLICIT_DEF + $vgpr17 = IMPLICIT_DEF + $vgpr18 = IMPLICIT_DEF + $vgpr19 = IMPLICIT_DEF + $vgpr20 = IMPLICIT_DEF + $vgpr21 = IMPLICIT_DEF + $vgpr22 = IMPLICIT_DEF + $vgpr23 = IMPLICIT_DEF + $vgpr24 = IMPLICIT_DEF + $vgpr25 = IMPLICIT_DEF + $vgpr26 = IMPLICIT_DEF + $vgpr27 = IMPLICIT_DEF + $vgpr28 = IMPLICIT_DEF + $vgpr29 = IMPLICIT_DEF + $vgpr30 = IMPLICIT_DEF + $vgpr31 = IMPLICIT_DEF + $vgpr32 = IMPLICIT_DEF + $vgpr33 = IMPLICIT_DEF + $vgpr34 = IMPLICIT_DEF + $vgpr35 = IMPLICIT_DEF + $vgpr36 = IMPLICIT_DEF + $vgpr37 = IMPLICIT_DEF + $vgpr38 = IMPLICIT_DEF + $vgpr39 = IMPLICIT_DEF + $vgpr40 = IMPLICIT_DEF + $vgpr41 = IMPLICIT_DEF + $vgpr42 = IMPLICIT_DEF + $vgpr43 = IMPLICIT_DEF + $vgpr44 = IMPLICIT_DEF + $vgpr45 = IMPLICIT_DEF + $vgpr46 = IMPLICIT_DEF + $vgpr47 = IMPLICIT_DEF + $vgpr48 = IMPLICIT_DEF + $vgpr49 = IMPLICIT_DEF + $vgpr50 = IMPLICIT_DEF + $vgpr51 = IMPLICIT_DEF + $vgpr52 = IMPLICIT_DEF + $vgpr53 = IMPLICIT_DEF + $vgpr54 = IMPLICIT_DEF + $vgpr55 = IMPLICIT_DEF + $vgpr56 = IMPLICIT_DEF + $vgpr57 = IMPLICIT_DEF + $vgpr58 = IMPLICIT_DEF + $vgpr59 = IMPLICIT_DEF + $vgpr60 = IMPLICIT_DEF + $vgpr61 = IMPLICIT_DEF + + $agpr64_agpr65_agpr66_agpr67_agpr68_agpr69_agpr70_agpr71_agpr72_agpr73_agpr74_agpr75_agpr76_agpr77_agpr78_agpr79 = COPY $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, implicit $exec + SI_SPILL_AV512_SAVE killed $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV512_SAVE $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) +...