diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1167,6 +1167,8 @@ } } + MRI.freezeReservedRegs(MF); + // Stack slot coloring may assign different objects to the same stack slot. // If not, then the VGPR to AGPR spill slot is dead. for (unsigned FI : SpillFIs.set_bits()) diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -269,10 +269,10 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -762,11 +762,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -1286,11 +1286,11 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -1782,11 +1782,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -2308,12 +2308,12 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -2807,11 +2807,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -3329,10 +3329,10 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -3822,11 +3822,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -4345,11 +4345,11 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -4841,11 +4841,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -5365,12 +5365,12 @@ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -5864,11 +5864,11 @@ ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -13,133 +13,133 @@ ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] a[0:15] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a15 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a31, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a14 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a14 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a30, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a13 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a13 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a29, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a12 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a12 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a28, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a11 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a11 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a27, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a26, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a25, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a24, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a23, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a22, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a21, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a20, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a19, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a18, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a17, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 ; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX908-NEXT: s_nop 7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse -; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a0, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a1, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a2, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a4, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a5, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a6, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a7, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a8, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a9, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse @@ -863,133 +863,133 @@ ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] s[0:15] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_mov_b32_e32 v34, s15 +; GFX908-NEXT: v_mov_b32_e32 v39, s15 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a31, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s14 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s14 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a30, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s13 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s13 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a29, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s12 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s12 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a28, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s11 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s11 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a27, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s10 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s10 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a26, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s9 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s9 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a25, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s8 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s8 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a24, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s7 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a23, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s6 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s6 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a22, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s5 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s5 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a21, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s4 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s4 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a20, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s3 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s3 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a19, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s2 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a18, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a17, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s0 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s0 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 ; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX908-NEXT: s_nop 7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse -; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a32, v34 -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a32, v39 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a0, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a1, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a2, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a4, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a5, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a6, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a7, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a8, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a9, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse