diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -900,6 +900,26 @@ /// of reserved registers before allocation begins. void freezeReservedRegs(const MachineFunction&); + /// reserveReg -- Mark a register as reserved so checks like isAllocatable + /// will not suggest using it. This should not be used during the middle + /// of a function walk, or when liveness info is available. + void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI) { + MCRegAliasIterator R(PhysReg, TRI, true); + + for (; R.isValid(); ++R) + ReservedRegs.set(*R); + } + + /// unreserveReg -- Mark a register as not reserved so checks like isAllocatable + /// will suggest using it. This should not be used during the middle + /// of a function walk, or when liveness info is available. + void unreserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI) { + MCRegAliasIterator R(PhysReg, TRI, true); + + for (; R.isValid(); ++R) + ReservedRegs.reset(*R); + } + /// reservedRegsFrozen - Returns true after freezeReservedRegs() was called /// to ensure the set of reserved registers stays constant. bool reservedRegsFrozen() const { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -425,6 +425,7 @@ OtherUsedRegs.set(*NextSpillReg); SpillRegs.push_back(*NextSpillReg); + MRI.reserveReg(*NextSpillReg, TRI); Spill.Lanes[I] = *NextSpillReg++; } diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -269,10 +269,10 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -762,11 +762,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -1286,11 +1286,11 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -1782,11 +1782,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -2308,12 +2308,12 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -2807,11 +2807,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -3329,10 +3329,10 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -3822,11 +3822,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -4345,11 +4345,11 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -4841,11 +4841,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -5365,12 +5365,12 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -5864,11 +5864,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -13,133 +13,133 @@ ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] a[0:15] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a15 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a31, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a14 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a14 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a30, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a13 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a13 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a29, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a12 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a12 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a28, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a11 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a11 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a27, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a26, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a25, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a24, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a23, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a22, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a21, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a20, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a19, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a18, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a17, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 ; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX908-NEXT: s_nop 7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse -; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a0, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a1, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a2, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a4, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a5, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a6, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a7, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a8, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a9, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse @@ -863,133 +863,133 @@ ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] s[0:15] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_mov_b32_e32 v34, s15 +; GFX908-NEXT: v_mov_b32_e32 v39, s15 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a31, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s14 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s14 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a30, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s13 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s13 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a29, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s12 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s12 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a28, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s11 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s11 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a27, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s10 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s10 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a26, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s9 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s9 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a25, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s8 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s8 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a24, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s7 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a23, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s6 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s6 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a22, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s5 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s5 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a21, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s4 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s4 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a20, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s3 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s3 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a19, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s2 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a18, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a17, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s0 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s0 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 ; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX908-NEXT: s_nop 7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse -; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a32, v34 -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a32, v39 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a0, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a1, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a2, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a4, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a5, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a6, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a7, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a8, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a9, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse diff --git a/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir @@ -0,0 +1,417 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX908 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX90A %s + +--- | + + define amdgpu_kernel void @test_spill() #0 { + ; GFX908-LABEL: test_spill: + ; GFX908: ; %bb.0: + ; GFX908-NEXT: ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 + ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 + ; GFX908-NEXT: ; implicit-def: $vgpr0 + ; GFX908-NEXT: ; implicit-def: $vgpr1 + ; GFX908-NEXT: ; implicit-def: $vgpr2 + ; GFX908-NEXT: ; implicit-def: $vgpr3 + ; GFX908-NEXT: ; implicit-def: $vgpr4 + ; GFX908-NEXT: ; implicit-def: $vgpr5 + ; GFX908-NEXT: ; implicit-def: $vgpr6 + ; GFX908-NEXT: ; implicit-def: $vgpr7 + ; GFX908-NEXT: ; implicit-def: $vgpr8 + ; GFX908-NEXT: ; implicit-def: $vgpr9 + ; GFX908-NEXT: ; implicit-def: $vgpr10 + ; GFX908-NEXT: ; implicit-def: $vgpr11 + ; GFX908-NEXT: ; implicit-def: $vgpr12 + ; GFX908-NEXT: ; implicit-def: $vgpr13 + ; GFX908-NEXT: ; implicit-def: $vgpr14 + ; GFX908-NEXT: ; implicit-def: $vgpr15 + ; GFX908-NEXT: ; implicit-def: $vgpr16 + ; GFX908-NEXT: ; implicit-def: $vgpr17 + ; GFX908-NEXT: ; implicit-def: $vgpr18 + ; GFX908-NEXT: ; implicit-def: $vgpr19 + ; GFX908-NEXT: ; implicit-def: $vgpr20 + ; GFX908-NEXT: ; implicit-def: $vgpr21 + ; GFX908-NEXT: ; implicit-def: $vgpr22 + ; GFX908-NEXT: ; implicit-def: $vgpr23 + ; GFX908-NEXT: ; implicit-def: $vgpr24 + ; GFX908-NEXT: ; implicit-def: $vgpr25 + ; GFX908-NEXT: ; implicit-def: $vgpr26 + ; GFX908-NEXT: ; implicit-def: $vgpr27 + ; GFX908-NEXT: ; implicit-def: $vgpr28 + ; GFX908-NEXT: ; implicit-def: $vgpr29 + ; GFX908-NEXT: ; implicit-def: $vgpr30 + ; GFX908-NEXT: ; implicit-def: $vgpr31 + ; GFX908-NEXT: ; implicit-def: $vgpr32 + ; GFX908-NEXT: ; implicit-def: $vgpr33 + ; GFX908-NEXT: ; implicit-def: $vgpr34 + ; GFX908-NEXT: ; implicit-def: $vgpr35 + ; GFX908-NEXT: ; implicit-def: $vgpr36 + ; GFX908-NEXT: ; implicit-def: $vgpr37 + ; GFX908-NEXT: ; implicit-def: $vgpr38 + ; GFX908-NEXT: ; implicit-def: $vgpr39 + ; GFX908-NEXT: ; implicit-def: $vgpr40 + ; GFX908-NEXT: ; implicit-def: $vgpr41 + ; GFX908-NEXT: ; implicit-def: $vgpr42 + ; GFX908-NEXT: ; implicit-def: $vgpr43 + ; GFX908-NEXT: ; implicit-def: $vgpr44 + ; GFX908-NEXT: ; implicit-def: $vgpr45 + ; GFX908-NEXT: ; implicit-def: $vgpr46 + ; GFX908-NEXT: ; implicit-def: $vgpr47 + ; GFX908-NEXT: ; implicit-def: $vgpr48 + ; GFX908-NEXT: ; implicit-def: $vgpr49 + ; GFX908-NEXT: ; implicit-def: $vgpr50 + ; GFX908-NEXT: ; implicit-def: $vgpr51 + ; GFX908-NEXT: ; implicit-def: $vgpr52 + ; GFX908-NEXT: ; implicit-def: $vgpr53 + ; GFX908-NEXT: ; implicit-def: $vgpr54 + ; GFX908-NEXT: ; implicit-def: $vgpr55 + ; GFX908-NEXT: ; implicit-def: $vgpr56 + ; GFX908-NEXT: ; implicit-def: $vgpr57 + ; GFX908-NEXT: ; implicit-def: $vgpr58 + ; GFX908-NEXT: ; implicit-def: $vgpr59 + ; GFX908-NEXT: ; implicit-def: $vgpr60 + ; GFX908-NEXT: ; implicit-def: $vgpr61 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a64, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a65, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a66, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a67, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a68, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a69, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a70, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a71, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a72, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a73, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a74, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a75, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a76, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a77, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a78, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a79, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX908-NEXT: v_accvgpr_read_b32 v62, a111 ; Reload Reuse + ; GFX908-NEXT: s_nop 0 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill + ; + ; GFX90A-LABEL: test_spill: + ; GFX90A: ; %bb.0: + ; GFX90A-NEXT: ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 + ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; GFX90A-NEXT: v_accvgpr_mov_b32 a64, a96 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a65, a97 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a66, a98 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a67, a99 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a68, a100 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a69, a101 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a70, a102 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a71, a103 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a72, a104 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a73, a105 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a74, a106 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a75, a107 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a76, a108 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a77, a109 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a78, a110 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a79, a111 + ; GFX90A-NEXT: v_accvgpr_read_b32 v77, a96 ; Reload Reuse + ; GFX90A-NEXT: ; implicit-def: $vgpr0 + ; GFX90A-NEXT: ; implicit-def: $vgpr1 + ; GFX90A-NEXT: ; implicit-def: $vgpr2 + ; GFX90A-NEXT: ; implicit-def: $vgpr3 + ; GFX90A-NEXT: ; implicit-def: $vgpr4 + ; GFX90A-NEXT: ; implicit-def: $vgpr5 + ; GFX90A-NEXT: ; implicit-def: $vgpr6 + ; GFX90A-NEXT: ; implicit-def: $vgpr7 + ; GFX90A-NEXT: ; implicit-def: $vgpr8 + ; GFX90A-NEXT: ; implicit-def: $vgpr9 + ; GFX90A-NEXT: ; implicit-def: $vgpr10 + ; GFX90A-NEXT: ; implicit-def: $vgpr11 + ; GFX90A-NEXT: ; implicit-def: $vgpr12 + ; GFX90A-NEXT: ; implicit-def: $vgpr13 + ; GFX90A-NEXT: ; implicit-def: $vgpr14 + ; GFX90A-NEXT: ; implicit-def: $vgpr15 + ; GFX90A-NEXT: ; implicit-def: $vgpr16 + ; GFX90A-NEXT: ; implicit-def: $vgpr17 + ; GFX90A-NEXT: ; implicit-def: $vgpr18 + ; GFX90A-NEXT: ; implicit-def: $vgpr19 + ; GFX90A-NEXT: ; implicit-def: $vgpr20 + ; GFX90A-NEXT: ; implicit-def: $vgpr21 + ; GFX90A-NEXT: ; implicit-def: $vgpr22 + ; GFX90A-NEXT: ; implicit-def: $vgpr23 + ; GFX90A-NEXT: ; implicit-def: $vgpr24 + ; GFX90A-NEXT: ; implicit-def: $vgpr25 + ; GFX90A-NEXT: ; implicit-def: $vgpr26 + ; GFX90A-NEXT: ; implicit-def: $vgpr27 + ; GFX90A-NEXT: ; implicit-def: $vgpr28 + ; GFX90A-NEXT: ; implicit-def: $vgpr29 + ; GFX90A-NEXT: ; implicit-def: $vgpr30 + ; GFX90A-NEXT: ; implicit-def: $vgpr31 + ; GFX90A-NEXT: ; implicit-def: $vgpr32 + ; GFX90A-NEXT: ; implicit-def: $vgpr33 + ; GFX90A-NEXT: ; implicit-def: $vgpr34 + ; GFX90A-NEXT: ; implicit-def: $vgpr35 + ; GFX90A-NEXT: ; implicit-def: $vgpr36 + ; GFX90A-NEXT: ; implicit-def: $vgpr37 + ; GFX90A-NEXT: ; implicit-def: $vgpr38 + ; GFX90A-NEXT: ; implicit-def: $vgpr39 + ; GFX90A-NEXT: ; implicit-def: $vgpr40 + ; GFX90A-NEXT: ; implicit-def: $vgpr41 + ; GFX90A-NEXT: ; implicit-def: $vgpr42 + ; GFX90A-NEXT: ; implicit-def: $vgpr43 + ; GFX90A-NEXT: ; implicit-def: $vgpr44 + ; GFX90A-NEXT: ; implicit-def: $vgpr45 + ; GFX90A-NEXT: ; implicit-def: $vgpr46 + ; GFX90A-NEXT: ; implicit-def: $vgpr47 + ; GFX90A-NEXT: ; implicit-def: $vgpr48 + ; GFX90A-NEXT: ; implicit-def: $vgpr49 + ; GFX90A-NEXT: ; implicit-def: $vgpr50 + ; GFX90A-NEXT: ; implicit-def: $vgpr51 + ; GFX90A-NEXT: ; implicit-def: $vgpr52 + ; GFX90A-NEXT: ; implicit-def: $vgpr53 + ; GFX90A-NEXT: ; implicit-def: $vgpr54 + ; GFX90A-NEXT: ; implicit-def: $vgpr55 + ; GFX90A-NEXT: ; implicit-def: $vgpr56 + ; GFX90A-NEXT: ; implicit-def: $vgpr57 + ; GFX90A-NEXT: ; implicit-def: $vgpr58 + ; GFX90A-NEXT: ; implicit-def: $vgpr59 + ; GFX90A-NEXT: ; implicit-def: $vgpr60 + ; GFX90A-NEXT: ; implicit-def: $vgpr61 + ; GFX90A-NEXT: v_accvgpr_read_b32 v76, a97 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v75, a98 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v74, a99 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v73, a100 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v72, a101 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v71, a102 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v70, a103 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v69, a104 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v68, a105 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v67, a106 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v66, a107 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v65, a108 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v64, a109 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v62, a111 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v93, a96 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v92, a97 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v91, a98 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v90, a99 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v89, a100 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v88, a101 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v87, a102 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v86, a103 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v85, a104 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v84, a105 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v83, a106 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v82, a107 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v81, a108 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v80, a109 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v79, a110 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v78, a111 ; Reload Reuse + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="4,4" } + +... +--- +name: test_spill +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 } + +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = IMPLICIT_DEF + $vgpr8 = IMPLICIT_DEF + $vgpr9 = IMPLICIT_DEF + $vgpr10 = IMPLICIT_DEF + $vgpr11 = IMPLICIT_DEF + $vgpr12 = IMPLICIT_DEF + $vgpr13 = IMPLICIT_DEF + $vgpr14 = IMPLICIT_DEF + $vgpr15 = IMPLICIT_DEF + $vgpr16 = IMPLICIT_DEF + $vgpr17 = IMPLICIT_DEF + $vgpr18 = IMPLICIT_DEF + $vgpr19 = IMPLICIT_DEF + $vgpr20 = IMPLICIT_DEF + $vgpr21 = IMPLICIT_DEF + $vgpr22 = IMPLICIT_DEF + $vgpr23 = IMPLICIT_DEF + $vgpr24 = IMPLICIT_DEF + $vgpr25 = IMPLICIT_DEF + $vgpr26 = IMPLICIT_DEF + $vgpr27 = IMPLICIT_DEF + $vgpr28 = IMPLICIT_DEF + $vgpr29 = IMPLICIT_DEF + $vgpr30 = IMPLICIT_DEF + $vgpr31 = IMPLICIT_DEF + $vgpr32 = IMPLICIT_DEF + $vgpr33 = IMPLICIT_DEF + $vgpr34 = IMPLICIT_DEF + $vgpr35 = IMPLICIT_DEF + $vgpr36 = IMPLICIT_DEF + $vgpr37 = IMPLICIT_DEF + $vgpr38 = IMPLICIT_DEF + $vgpr39 = IMPLICIT_DEF + $vgpr40 = IMPLICIT_DEF + $vgpr41 = IMPLICIT_DEF + $vgpr42 = IMPLICIT_DEF + $vgpr43 = IMPLICIT_DEF + $vgpr44 = IMPLICIT_DEF + $vgpr45 = IMPLICIT_DEF + $vgpr46 = IMPLICIT_DEF + $vgpr47 = IMPLICIT_DEF + $vgpr48 = IMPLICIT_DEF + $vgpr49 = IMPLICIT_DEF + $vgpr50 = IMPLICIT_DEF + $vgpr51 = IMPLICIT_DEF + $vgpr52 = IMPLICIT_DEF + $vgpr53 = IMPLICIT_DEF + $vgpr54 = IMPLICIT_DEF + $vgpr55 = IMPLICIT_DEF + $vgpr56 = IMPLICIT_DEF + $vgpr57 = IMPLICIT_DEF + $vgpr58 = IMPLICIT_DEF + $vgpr59 = IMPLICIT_DEF + $vgpr60 = IMPLICIT_DEF + $vgpr61 = IMPLICIT_DEF + + $agpr64_agpr65_agpr66_agpr67_agpr68_agpr69_agpr70_agpr71_agpr72_agpr73_agpr74_agpr75_agpr76_agpr77_agpr78_agpr79 = COPY $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, implicit $exec + SI_SPILL_AV512_SAVE killed $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV512_SAVE $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) +...