diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h --- a/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -223,6 +223,7 @@ /// No more than InstrLimit instructions are inspected. Register findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, + ArrayRef AllocationOrder, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -272,9 +272,18 @@ Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, + ArrayRef AllocationOrder, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI) { - int Survivor = Candidates.find_first(); + auto FindFirstCandidate = [&]() -> int { + for (MCPhysReg Reg : AllocationOrder) { + if (Candidates.test(Reg)) + return Reg; + } + return -1; + }; + + int Survivor = FindFirstCandidate(); assert(Survivor > 0 && "No candidates for scavenging"); MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); @@ -322,7 +331,7 @@ if (Candidates.none()) break; - Survivor = Candidates.find_first(); + Survivor = FindFirstCandidate(); } // If we ran off the end, that's where we want to restore. if (MI == ME) RestorePointMI = ME; @@ -551,7 +560,8 @@ // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; - Register SReg = findSurvivorReg(I, Candidates, 25, UseMI); + Register SReg = + findSurvivorReg(I, Candidates, RC->getRawAllocationOrder(MF), 25, UseMI); // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -160,11 +160,11 @@ ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 ; FLATSCR-NEXT: s_mov_b32 s8, 0 +; FLATSCR-NEXT: s_mov_b32 s12, 0 +; FLATSCR-NEXT: s_mov_b32 s11, 0 ; FLATSCR-NEXT: s_mov_b32 s10, 0 ; FLATSCR-NEXT: s_mov_b32 s9, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: s_mov_b32 s11, 0 +; FLATSCR-NEXT: s_mov_b32 s13, 0 ; FLATSCR-NEXT: s_mov_b32 s7, 0 ; FLATSCR-NEXT: s_mov_b32 s5, 0 ; FLATSCR-NEXT: s_mov_b32 s3, 0 @@ -174,11 +174,11 @@ ; FLATSCR-NEXT: s_mov_b32 s4, 0 ; FLATSCR-NEXT: s_mov_b32 s6, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s8 offset:8 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:16 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:24 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:32 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:40 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:48 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s12 offset:16 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s13 offset:48 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s7 offset:56 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s5 offset:64 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s3 offset:72 @@ -188,11 +188,11 @@ ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s4 offset:104 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s6 offset:112 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s8 offset:120 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:128 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s9 offset:8 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s12 offset:128 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s11 offset:8 ; FLATSCR-NEXT: s_nop 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, vcc_lo offset:16 -; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, vcc_hi offset:24 +; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s10 offset:16 +; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s9 offset:24 ; FLATSCR-NEXT: s_mov_b32 s37, 0 ; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s37 offset:32 ; FLATSCR-NEXT: s_mov_b32 s36, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -279,8 +279,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s2, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -368,8 +368,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_add_u32_e32 v1, 0x104, v1 @@ -459,15 +459,15 @@ ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX9-NEXT: s_add_i32 s1, s32, 0x100 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: v_add_u32_e32 v1, vcc_lo, v1 +; GFX9-NEXT: v_add_u32_e32 v1, s1, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 +; GFX9-NEXT: s_add_i32 s0, s32, 0x100 ; GFX9-NEXT: scratch_store_dword v1, v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v0, vcc_hi, v0 +; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -478,14 +478,14 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s1, v0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v1, vcc_lo, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, s0, v1 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -498,8 +498,8 @@ ; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX940-NEXT: v_add_u32_e32 v1, vcc_hi, v1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x100 +; GFX940-NEXT: v_add_u32_e32 v1, s0, v1 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 @@ -515,10 +515,10 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX11-NEXT: s_add_i32 s0, s32, 0x100 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_add_nc_u32_e32 v1, vcc_lo, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v1, s0, v1 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: scratch_store_b32 v1, v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -544,8 +544,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s2, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -587,7 +587,6 @@ ; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_lshl_b32 s1, s0, 2 ; GFX940-NEXT: s_and_b32 s0, s0, 15 @@ -596,14 +595,14 @@ ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1 +; GFX940-NEXT: s_movk_i32 s0, 0x4004 +; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_large_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x24 -; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX11-NEXT: scratch_load_b32 v2, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s1, s0, 15 @@ -614,7 +613,8 @@ ; GFX11-NEXT: s_addk_i32 s0, 0x4004 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc +; GFX11-NEXT: s_movk_i32 s0, 0x4004 +; GFX11-NEXT: scratch_load_b32 v0, v1, s0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm bb: @@ -635,8 +635,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_add_u32_e32 v1, 0x4004, v1 @@ -679,10 +679,10 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v3, 15 -; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX940-NEXT: s_movk_i32 s0, 0x4004 ; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 ; GFX940-NEXT: v_mov_b32_e32 v2, 0x4004 -; GFX940-NEXT: scratch_store_dword v1, v3, vcc_hi sc0 sc1 +; GFX940-NEXT: scratch_store_dword v1, v3, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v1, 0x7c @@ -695,13 +695,13 @@ ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 ; GFX11-NEXT: v_dual_mov_b32 v3, 15 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v2, 0x7c :: v_dual_lshlrev_b32 v1, 2, v1 ; GFX11-NEXT: v_add3_u32 v1, 0x4004, v1, v2 ; GFX11-NEXT: scratch_load_b32 v2, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 v0, v3, vcc_lo dlc +; GFX11-NEXT: scratch_store_b32 v0, v3, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -728,15 +728,15 @@ ; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: v_add_u32_e32 v1, vcc_lo, v1 +; GFX9-NEXT: v_add_u32_e32 v1, s1, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX9-NEXT: scratch_store_dword v1, v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v0, vcc_hi, v0 +; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -747,14 +747,14 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s1, v0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v1, vcc_lo, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v1, s0, v1 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -767,15 +767,15 @@ ; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX940-NEXT: v_add_u32_e32 v1, vcc_lo, v1 +; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 +; GFX940-NEXT: v_add_u32_e32 v1, s1, v1 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -785,16 +785,16 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v1, s0, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v1, s1, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: scratch_store_b32 v1, v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v0, vcc_lo glc dlc +; GFX11-NEXT: scratch_load_b32 v0, v0, s0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: @@ -816,9 +816,9 @@ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: s_mov_b32 s1, 0 ; GFX9-NEXT: s_movk_i32 s0, 0x3e80 -; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 +; GFX9-NEXT: scratch_store_dword off, v0, s1 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: s_add_i32 s0, s0, 4 @@ -887,11 +887,11 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_movk_i32 s0, 0x3e80 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4 +; GFX9-NEXT: s_add_i32 s1, s32, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_i32 s0, s0, vcc_hi +; GFX9-NEXT: s_add_i32 s0, s0, s1 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -905,8 +905,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX10-NEXT: s_add_i32 s1, s32, 4 +; GFX10-NEXT: s_add_i32 s0, s0, s1 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -379,8 +379,8 @@ ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; MUBUF-NEXT: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x2000{{$}} ; MUBUF-NEXT: buffer_store_dword [[ZERO]], [[OFFSET]], s[0:3], s33 offen{{$}} -; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x2000 -; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], vcc_hi +; FLATSCR-NEXT: s_add_i32 s1, s33, 0x2000 +; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000 ; FLATSCR-NEXT: s_addk_i32 s32, 0xa000 diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -481,26 +481,26 @@ ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 +; FLATSCR-NEXT: s_mov_b32 s6, 0 ; FLATSCR-NEXT: s_mov_b32 s5, 0 ; FLATSCR-NEXT: s_mov_b32 s4, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s5 offset:4 +; FLATSCR-NEXT: scratch_store_short off, v0, s6 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] offset:2 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s4 offset:6 +; FLATSCR-NEXT: scratch_store_short off, v0, s5 offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] offset:4 +; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s0 offset:8 +; FLATSCR-NEXT: scratch_store_short off, v0, s4 offset:8 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 -; FLATSCR-NEXT: scratch_load_dword v1, off, vcc_hi offset:6 +; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:4 +; FLATSCR-NEXT: scratch_load_dword v1, off, s0 offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; FLATSCR-NEXT: s_endpgm @@ -544,28 +544,28 @@ ; FLATSCR_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLATSCR_GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v2, 0 +; FLATSCR_GFX10-NEXT: s_mov_b32 s6, 0 ; FLATSCR_GFX10-NEXT: s_mov_b32 s5, 0 ; FLATSCR_GFX10-NEXT: s_mov_b32 s4, 0 -; FLATSCR_GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s5 offset:4 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s6 offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] offset:2 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s4 offset:6 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s5 offset:6 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; FLATSCR_GFX10-NEXT: s_mov_b32 s1, 0 ; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s1 offset:8 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s4 offset:8 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: s_clause 0x1 -; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s0 offset:4 -; FLATSCR_GFX10-NEXT: scratch_load_dword v1, off, vcc_lo offset:6 +; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s1 offset:4 +; FLATSCR_GFX10-NEXT: scratch_load_dword v1, off, s0 offset:6 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; FLATSCR_GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -42,16 +42,16 @@ ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 ; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, 0 -; FLAT_SCR_OPT-NEXT: s_mov_b32 vcc_lo, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, vcc_lo offset:4 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s0, 0 +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s0 offset:4 ; FLAT_SCR_OPT-NEXT: s_waitcnt_vscnt null, 0x0 ; FLAT_SCR_OPT-NEXT: s_endpgm ; ; FLAT_SCR_ARCH-LABEL: stack_object_in_kernel_no_calls: ; FLAT_SCR_ARCH: ; %bb.0: ; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, 0 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 vcc_lo, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, vcc_lo offset:4 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s0, 0 +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s0 offset:4 ; FLAT_SCR_ARCH-NEXT: s_waitcnt_vscnt null, 0x0 ; FLAT_SCR_ARCH-NEXT: s_endpgm %alloca = alloca i32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -21,13 +21,13 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: s_mov_b32 s3, 0 +; GFX9-NEXT: s_mov_b32 s2, 0 ; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:52 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:52 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:36 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:20 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:4 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: zero_init_kernel: @@ -73,8 +73,6 @@ ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 @@ -86,11 +84,13 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-PAL-NEXT: s_mov_b32 s3, 0 +; GFX9-PAL-NEXT: s_mov_b32 s2, 0 ; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:52 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:52 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:36 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:20 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:4 ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: zero_init_kernel: @@ -119,7 +119,6 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s3, s0 @@ -127,12 +126,13 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1010-PAL-NEXT: s_mov_b32 s3, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:52 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:36 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:20 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:4 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:52 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:36 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:20 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:4 ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: zero_init_kernel: @@ -903,13 +903,13 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: s_mov_b32 s3, 0 +; GFX9-NEXT: s_mov_b32 s2, 0 ; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:260 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:260 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:276 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:292 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:308 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: zero_init_small_offset_kernel: @@ -960,8 +960,6 @@ ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 s4, 0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 @@ -975,11 +973,13 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-PAL-NEXT: s_mov_b32 s3, 0 +; GFX9-PAL-NEXT: s_mov_b32 s2, 0 ; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:260 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:260 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:276 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:292 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:308 ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: zero_init_small_offset_kernel: @@ -1020,13 +1020,13 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1010-PAL-NEXT: s_mov_b32 s3, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:260 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:276 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:292 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:308 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:260 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:276 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:292 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:308 ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: zero_init_small_offset_kernel: @@ -1238,8 +1238,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s2, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -1298,13 +1298,13 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[4:5] ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: s_mov_b32 s2, 0 ; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 @@ -1348,8 +1348,8 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 ; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_waitcnt lgkmcnt(0) @@ -1426,8 +1426,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 ; GFX9-NEXT: s_addk_i32 s0, 0x104 @@ -1482,12 +1482,12 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-PAL-NEXT: s_mov_b32 s2, 0 +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 @@ -1528,9 +1528,9 @@ ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 ; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 @@ -1601,8 +1601,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_add_u32_e32 v1, 0x104, v0 @@ -1653,14 +1653,14 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v1, off, s0 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_add_u32_e32 v1, 0x104, v0 ; GFX9-PAL-NEXT: v_sub_u32_e32 v0, 0x104, v0 @@ -1698,8 +1698,8 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX1010-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v3, off, vcc_lo offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v3, off, s0 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_sub_nc_u32_e32 v1, 0x104, v0 ; GFX1010-PAL-NEXT: v_add_nc_u32_e32 v0, 0x104, v0 @@ -1768,8 +1768,8 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi +; GFX9-NEXT: s_add_i32 s0, s32, 0x100 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 @@ -1785,11 +1785,11 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: scratch_store_dword v0, v2, off @@ -1803,10 +1803,10 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX11-NEXT: s_add_i32 s0, s32, 0x100 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, vcc_lo +; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_store_b32 v0, v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1819,8 +1819,8 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi +; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x100 +; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 @@ -1836,8 +1836,8 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX940-NEXT: v_mov_b32_e32 v1, vcc_hi +; GFX940-NEXT: s_add_i32 s0, s32, 0x100 +; GFX940-NEXT: v_mov_b32_e32 v1, s0 ; GFX940-NEXT: v_lshl_add_u32 v1, v0, 2, v1 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 @@ -1853,11 +1853,11 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off @@ -1871,10 +1871,10 @@ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x100 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, vcc_lo +; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1910,14 +1910,14 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: s_movk_i32 s3, 0x4004 +; GFX9-NEXT: s_movk_i32 s2, 0x4004 ; GFX9-NEXT: s_movk_i32 s1, 0x4004 ; GFX9-NEXT: s_movk_i32 s0, 0x4004 -; GFX9-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX9-NEXT: s_movk_i32 vcc_hi, 0x4004 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: zero_init_large_offset_kernel: @@ -1929,7 +1929,6 @@ ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -1937,13 +1936,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 +; GFX10-NEXT: s_movk_i32 s3, 0x4004 ; GFX10-NEXT: s_movk_i32 s2, 0x4004 ; GFX10-NEXT: s_movk_i32 s1, 0x4004 ; GFX10-NEXT: s_movk_i32 s0, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: zero_init_large_offset_kernel: @@ -1951,20 +1951,21 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: s_movk_i32 s3, 0x4004 ; GFX11-NEXT: s_movk_i32 s2, 0x4004 ; GFX11-NEXT: s_movk_i32 s1, 0x4004 ; GFX11-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-NEXT: s_clause 0x3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s3 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; @@ -1975,8 +1976,6 @@ ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 s4, 0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX9-PAL-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 @@ -1990,12 +1989,14 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-PAL-NEXT: s_movk_i32 s3, 0x4004 +; GFX9-PAL-NEXT: s_movk_i32 s2, 0x4004 ; GFX9-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: zero_init_large_offset_kernel: @@ -2008,14 +2009,14 @@ ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX940-NEXT: s_movk_i32 s3, 0x4004 +; GFX940-NEXT: s_movk_i32 s2, 0x4004 ; GFX940-NEXT: s_movk_i32 s1, 0x4004 ; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s3 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1 ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_large_offset_kernel: @@ -2040,14 +2041,14 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1010-PAL-NEXT: s_movk_i32 s3, 0x4004 ; GFX1010-PAL-NEXT: s_movk_i32 s2, 0x4004 ; GFX1010-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX1010-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: zero_init_large_offset_kernel: @@ -2064,7 +2065,6 @@ ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1030-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 @@ -2072,13 +2072,14 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1030-PAL-NEXT: s_movk_i32 s3, 0x4004 ; GFX1030-PAL-NEXT: s_movk_i32 s2, 0x4004 ; GFX1030-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: zero_init_large_offset_kernel: @@ -2086,20 +2087,21 @@ ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-PAL-NEXT: s_movk_i32 s3, 0x4004 ; GFX11-PAL-NEXT: s_movk_i32 s2, 0x4004 ; GFX11-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX11-PAL-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-PAL-NEXT: s_clause 0x3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s3 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -2124,14 +2126,14 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: s_add_i32 s3, s32, 0x4004 +; GFX9-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2149,14 +2151,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 +; GFX10-NEXT: s_add_i32 s3, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2173,15 +2175,15 @@ ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: s_add_i32 s3, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-NEXT: s_clause 0x3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s3 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2198,14 +2200,14 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-PAL-NEXT: s_add_i32 s3, s32, 0x4004 +; GFX9-PAL-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -2220,14 +2222,14 @@ ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX940-NEXT: s_add_i32 s3, s32, 0x4004 +; GFX940-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s3 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -2245,14 +2247,14 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX10-PAL-NEXT: s_add_i32 s3, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 +; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 +; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -2269,15 +2271,15 @@ ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-PAL-NEXT: s_add_i32 s3, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-PAL-NEXT: s_clause 0x3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s3 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -2294,8 +2296,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s2, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -2354,13 +2356,13 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[4:5] ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: s_mov_b32 s2, 0 ; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 @@ -2404,8 +2406,8 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 ; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_waitcnt lgkmcnt(0) @@ -2482,8 +2484,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 ; GFX9-NEXT: s_addk_i32 s0, 0x4004 @@ -2538,12 +2540,12 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-PAL-NEXT: s_mov_b32 s2, 0 +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 @@ -2584,9 +2586,9 @@ ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 ; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 @@ -2657,8 +2659,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v1, off, s0 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_add_u32_e32 v1, 0x4004, v0 @@ -2693,13 +2695,13 @@ ; GFX11-LABEL: store_load_vindex_large_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0x4004, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_add_nc_u32 v1, 0x7c, v1 -; GFX11-NEXT: scratch_store_b32 v0, v2, vcc_lo dlc +; GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2710,14 +2712,14 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v1, off, s0 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_add_u32_e32 v1, 0x4004, v0 ; GFX9-PAL-NEXT: v_sub_u32_e32 v0, 0x4004, v0 @@ -2734,8 +2736,8 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 -; GFX940-NEXT: scratch_store_dword v0, v1, vcc_hi sc0 sc1 +; GFX940-NEXT: s_movk_i32 s0, 0x4004 +; GFX940-NEXT: scratch_store_dword v0, v1, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_sub_u32_e32 v0, 0x4004, v0 ; GFX940-NEXT: v_add_u32_e32 v0, 0x7c, v0 @@ -2756,8 +2758,8 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX1010-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v3, off, vcc_lo offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v3, off, s0 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_sub_nc_u32_e32 v1, 0x4004, v0 ; GFX1010-PAL-NEXT: v_add_nc_u32_e32 v0, 0x4004, v0 @@ -2795,13 +2797,13 @@ ; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-PAL-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: v_sub_nc_u32_e32 v1, 0x4004, v0 ; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_add_nc_u32 v1, 0x7c, v1 -; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, vcc_lo dlc +; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, s0 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, off glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) @@ -2827,8 +2829,8 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi +; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 @@ -2844,11 +2846,11 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: scratch_store_dword v0, v2, off @@ -2862,16 +2864,16 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b32 v0, v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc +; GFX11-NEXT: scratch_load_b32 v0, v1, s0 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2880,8 +2882,8 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi +; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 @@ -2897,16 +2899,16 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX940-NEXT: v_mov_b32_e32 v1, vcc_lo +; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 +; GFX940-NEXT: v_mov_b32_e32 v1, s1 ; GFX940-NEXT: v_lshl_add_u32 v1, v0, 2, v1 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -2915,11 +2917,11 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off @@ -2933,16 +2935,16 @@ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 +; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc +; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s0 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] bb: @@ -2964,9 +2966,9 @@ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: s_mov_b32 s1, 0 ; GFX9-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 +; GFX9-NEXT: scratch_store_dword off, v0, s1 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 @@ -3014,13 +3016,13 @@ ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 +; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -3058,9 +3060,9 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 ; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4 -; GFX1010-PAL-NEXT: scratch_store_dword off, v0, vcc_lo offset:4 +; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3121,10 +3123,10 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4 +; GFX9-NEXT: s_add_i32 s1, s32, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s0, vcc_hi +; GFX9-NEXT: s_add_i32 s0, s0, s1 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3139,8 +3141,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX10-NEXT: s_add_i32 s1, s32, 4 +; GFX10-NEXT: s_add_i32 s0, s0, s1 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3155,9 +3157,9 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 ; GFX11-NEXT: s_movk_i32 s0, 0x3000 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 4 +; GFX11-NEXT: s_add_i32 s1, s32, 4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX11-NEXT: s_add_i32 s0, s0, s1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 off, v1, s0 offset:3712 dlc @@ -3171,10 +3173,10 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 4 +; GFX9-PAL-NEXT: s_add_i32 s1, s32, 4 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s0, vcc_hi +; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -3187,10 +3189,10 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 13 ; GFX940-NEXT: s_movk_i32 s0, 0x3000 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 4 +; GFX940-NEXT: s_add_i32 s1, s32, 4 ; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s0, vcc_hi +; GFX940-NEXT: s_add_i32 s0, s0, s1 ; GFX940-NEXT: v_mov_b32_e32 v0, 15 ; GFX940-NEXT: scratch_store_dword off, v0, s0 offset:3712 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -3205,8 +3207,8 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-PAL-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX10-PAL-NEXT: s_add_i32 s1, s32, 4 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, s1 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3221,9 +3223,9 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 ; GFX11-PAL-NEXT: s_movk_i32 s0, 0x3000 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 +; GFX11-PAL-NEXT: s_add_i32 s1, s32, 4 ; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-PAL-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX11-PAL-NEXT: s_add_i32 s0, s0, s1 ; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_store_b32 off, v1, s0 offset:3712 dlc @@ -3917,11 +3919,11 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024 +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:3024 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:3024 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 16 ; GFX9-NEXT: ;;#ASMSTART @@ -3992,11 +3994,11 @@ ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024 +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:3024 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-PAL-NEXT: scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc +; GFX9-PAL-NEXT: s_mov_b32 s0, 0 +; GFX9-PAL-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:3024 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 16 ; GFX9-PAL-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -53,8 +53,8 @@ ; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vcc_hi = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $vcc_hi, 4, implicit-def dead $scc + ; GCN-NEXT: $sgpr0 = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $sgpr0, 4, implicit-def dead $scc ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec @@ -89,12 +89,12 @@ ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vcc_lo = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_lo, 4, implicit-def $scc + ; GCN-NEXT: $sgpr1 = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $sgpr1, 4, implicit-def $scc ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc - ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 8, implicit-def $scc - ; GCN-NEXT: renamable $sgpr6 = S_MUL_I32 killed $vcc_hi, $sgpr5 + ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; GCN-NEXT: $sgpr0 = S_ADD_I32 killed $sgpr0, 8, implicit-def $scc + ; GCN-NEXT: renamable $sgpr6 = S_MUL_I32 killed $sgpr0, $sgpr5 ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6, implicit $exec ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec @@ -132,9 +132,9 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 7, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec - ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 512, implicit-def $scc - ; GCN-NEXT: renamable $sgpr4 = S_MUL_I32 killed $vcc_hi, 9 + ; GCN-NEXT: $sgpr5 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; GCN-NEXT: $sgpr5 = S_ADD_I32 killed $sgpr5, 512, implicit-def $scc + ; GCN-NEXT: renamable $sgpr4 = S_MUL_I32 killed $sgpr5, 9 ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec @@ -168,8 +168,8 @@ ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc ; GCN-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GCN-NEXT: $vcc_hi = V_READFIRSTLANE_B32 $vgpr1, implicit $exec - ; GCN-NEXT: renamable $sgpr5 = S_MUL_I32 killed $vcc_hi, $sgpr4 + ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec + ; GCN-NEXT: renamable $sgpr5 = S_MUL_I32 killed $sgpr0, $sgpr4 ; GCN-NEXT: renamable $sgpr6 = S_ADDC_U32 $sgpr5, 1234567, implicit-def dead $scc, implicit $scc ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6, implicit $exec ; GCN-NEXT: $m0 = S_MOV_B32 -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -6939,12 +6939,12 @@ ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX11-NEXT: s_add_i32 vcc_lo, s33, 8 +; GFX11-NEXT: s_add_i32 s2, s33, 8 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4 -; GFX11-NEXT: v_dual_mov_b32 v0, vcc_lo :: v_dual_mov_b32 v1, s33 +; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 @@ -6986,11 +6986,11 @@ ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX10-SCRATCH-NEXT: s_add_i32 vcc_lo, s33, 8 +; GFX10-SCRATCH-NEXT: s_add_i32 s2, s33, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, vcc_lo +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll --- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll @@ -2038,8 +2038,8 @@ ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 -; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, vcc_hi offset:4054 glc +; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44 +; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s0 offset:4054 glc ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -2106,8 +2106,8 @@ ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 -; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc +; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44 +; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s0 offset:4055 glc ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -2175,8 +2175,8 @@ ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 -; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc +; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44 +; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s0 offset:4055 glc ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -2244,8 +2244,8 @@ ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 -; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc +; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44 +; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s0 offset:4055 glc ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -2314,8 +2314,8 @@ ; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 -; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc +; GFX900-FLATSCR-NEXT: s_add_i32 s0, s32, 44 +; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s0 offset:4055 glc ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -61,9 +61,9 @@ ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: s_movk_i32 vcc_hi, 0x2000 +; FLATSCR-NEXT: s_movk_i32 s3, 0x2000 ; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi +; FLATSCR-NEXT: scratch_store_dword off, v0, s3 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: .LBB0_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -155,14 +155,14 @@ ; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: s_add_i32 s32, s32, 0x8000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 -; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x2000 +; FLATSCR-NEXT: s_add_i32 s1, s33, 0x2000 ; FLATSCR-NEXT: s_mov_b32 s0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v2, vcc_hi +; FLATSCR-NEXT: scratch_store_dword off, v2, s1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x3000 -; FLATSCR-NEXT: s_add_i32 s1, s0, vcc_hi +; FLATSCR-NEXT: s_add_i32 s3, s33, 0x3000 +; FLATSCR-NEXT: s_add_i32 s1, s0, s3 ; FLATSCR-NEXT: s_add_i32 s0, s0, 1 ; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120 ; FLATSCR-NEXT: scratch_store_byte off, v2, s1 @@ -275,9 +275,9 @@ ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: s_mov_b32 s3, 0 ; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:1024 +; FLATSCR-NEXT: scratch_store_dword off, v0, s3 offset:1024 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: .LBB2_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -87,10 +87,10 @@ ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $vcc_lo = S_ADD_I32 $sgpr33, 8192, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr9, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr8 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr8, $vgpr1, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll --- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll @@ -34,7 +34,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 @@ -102,7 +102,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 @@ -155,7 +155,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 @@ -237,7 +237,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 @@ -293,7 +293,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 @@ -316,7 +316,6 @@ ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen ; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX9PLUS-NOT: s_mov_b32 s5 ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen ; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen @@ -350,7 +349,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll --- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -26,17 +26,17 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_lo offset:8 glc +; FLATSCR-NEXT: s_mov_b32 s1, 0 +; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload +; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8 +; FLATSCR-NEXT: scratch_store_dword off, v0, s0 offset:8 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_endpgm entry: @@ -79,17 +79,17 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_lo offset:8 glc +; FLATSCR-NEXT: s_mov_b32 s1, 0 +; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload +; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8 +; FLATSCR-NEXT: scratch_store_dword off, v0, s0 offset:8 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_endpgm entry: @@ -218,10 +218,10 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: s_mov_b32 s9, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc +; FLATSCR-NEXT: scratch_load_dword v0, off, s9 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s8, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v0, s8 ; 4-byte Folded Spill @@ -302,15 +302,15 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc +; FLATSCR-NEXT: s_mov_b32 s2, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:12 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0xff8 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc +; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -369,15 +369,15 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc +; FLATSCR-NEXT: s_mov_b32 s2, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:12 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc +; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -68,9 +68,9 @@ ; FLATSCR-NEXT: s_and_saveexec_b32 s0, vcc_lo ; FLATSCR-NEXT: s_cbranch_execz .LBB0_2 ; FLATSCR-NEXT: ; %bb.1: ; %if.then4.i -; FLATSCR-NEXT: s_movk_i32 vcc_lo, 0x4000 +; FLATSCR-NEXT: s_movk_i32 s1, 0x4000 ; FLATSCR-NEXT: s_mov_b32 s0, 0x41c64e6d -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s1 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0 ; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 @@ -95,9 +95,9 @@ ; MUBUF11-NEXT: v_cmpx_ne_u32_e32 0, v0 ; MUBUF11-NEXT: s_cbranch_execz .LBB0_2 ; MUBUF11-NEXT: ; %bb.1: ; %if.then4.i -; MUBUF11-NEXT: s_movk_i32 vcc_lo, 0x4000 +; MUBUF11-NEXT: s_movk_i32 s1, 0x4000 ; MUBUF11-NEXT: s_mov_b32 s0, 0x41c64e6d -; MUBUF11-NEXT: scratch_load_b64 v[0:1], off, vcc_lo offset:4 +; MUBUF11-NEXT: scratch_load_b64 v[0:1], off, s1 offset:4 ; MUBUF11-NEXT: s_waitcnt vmcnt(0) ; MUBUF11-NEXT: v_add_nc_u32_e32 v2, v1, v0 ; MUBUF11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 @@ -123,9 +123,9 @@ ; FLATSCR11-NEXT: v_cmpx_ne_u32_e32 0, v0 ; FLATSCR11-NEXT: s_cbranch_execz .LBB0_2 ; FLATSCR11-NEXT: ; %bb.1: ; %if.then4.i -; FLATSCR11-NEXT: s_movk_i32 vcc_lo, 0x4000 +; FLATSCR11-NEXT: s_movk_i32 s1, 0x4000 ; FLATSCR11-NEXT: s_mov_b32 s0, 0x41c64e6d -; FLATSCR11-NEXT: scratch_load_b64 v[0:1], off, vcc_lo offset:4 +; FLATSCR11-NEXT: scratch_load_b64 v[0:1], off, s1 offset:4 ; FLATSCR11-NEXT: s_waitcnt vmcnt(0) ; FLATSCR11-NEXT: v_add_nc_u32_e32 v2, v1, v0 ; FLATSCR11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir @@ -1085,10 +1085,10 @@ ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc - ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc + ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_BITSET0_B32 0, $sgpr4 + ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GFX9-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX9-FLATSCR-NEXT: {{ $}} @@ -1104,10 +1104,10 @@ ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc - ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc - ; GFX10-FLATSCR-NEXT: $vcc_lo = S_BITSET0_B32 0, $vcc_lo - ; GFX10-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec + ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc + ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc + ; GFX10-FLATSCR-NEXT: $sgpr4 = S_BITSET0_B32 0, $sgpr4 + ; GFX10-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; GFX10-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GFX10-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX10-FLATSCR-NEXT: {{ $}} @@ -1191,10 +1191,10 @@ ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc - ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc + ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_BITSET0_B32 0, $sgpr4 + ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc @@ -1214,10 +1214,10 @@ ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc - ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc - ; GFX10-FLATSCR-NEXT: $vcc_lo = S_BITSET0_B32 0, $vcc_lo - ; GFX10-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec + ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc + ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc + ; GFX10-FLATSCR-NEXT: $sgpr4 = S_BITSET0_B32 0, $sgpr4 + ; GFX10-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; GFX10-FLATSCR-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc @@ -1303,10 +1303,10 @@ ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc - ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc + ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_BITSET0_B32 0, $sgpr4 + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; GFX9-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: @@ -1323,10 +1323,10 @@ ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX10-FLATSCR-NEXT: $vcc_lo = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc - ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $vcc_lo, 0, implicit-def $scc - ; GFX10-FLATSCR-NEXT: $vcc_lo = S_BITSET0_B32 0, $vcc_lo - ; GFX10-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec + ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc + ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc + ; GFX10-FLATSCR-NEXT: $sgpr4 = S_BITSET0_B32 0, $sgpr4 + ; GFX10-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; GFX10-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: