diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -170,7 +170,8 @@ // a register as actually in use in another lane, so we need to save all // used lanes of the chosen VGPR. assert(RS && "Cannot spill SGPR to memory without RegScavenger"); - TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); + TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, + 0, false); // Reserve temporary stack slot TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI); @@ -199,7 +200,7 @@ const TargetRegisterClass &RC = IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass; RS->setRegUsed(SuperReg); - SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false); + SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false); int64_t VGPRLanes = getPerVGPRData().VGPRLanes; @@ -1591,7 +1592,8 @@ } else if (UseVGPROffset) { // FIXME: change to scavengeRegisterBackwards() if (!TmpOffsetVGPR) { - TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, + MI, false, 0); RS->setRegUsed(TmpOffsetVGPR); } } @@ -2282,7 +2284,8 @@ const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass; - Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR); + Register TmpReg = + RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR); FIOp.setReg(TmpReg); FIOp.setIsKill(); @@ -2302,8 +2305,8 @@ Register TmpSReg = UseSGPR ? TmpReg - : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, - !UseSGPR); + : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, + MI, false, 0, !UseSGPR); // TODO: for flat scratch another attempt can be made with a VGPR index // if no SGPRs can be scavenged. @@ -2377,8 +2380,9 @@ : &AMDGPU::VGPR_32RegClass; bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 || MI->getOpcode() == AMDGPU::V_MOV_B32_e64; - Register ResultReg = IsCopy ? MI->getOperand(0).getReg() - : RS->scavengeRegister(RC, MI, 0); + Register ResultReg = + IsCopy ? MI->getOperand(0).getReg() + : RS->scavengeRegisterBackwards(*RC, MI, false, 0); int64_t Offset = FrameInfo.getObjectOffset(Index); if (Offset == 0) { @@ -2391,8 +2395,8 @@ if (IsSALU && !LiveSCC) Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead. if (IsSALU && LiveSCC) { - Register NewDest = - RS->scavengeRegister(&AMDGPU::SReg_32RegClass, Shift, 0); + Register NewDest = RS->scavengeRegisterBackwards( + AMDGPU::SReg_32RegClass, Shift, false, 0); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest) .addReg(ResultReg); @@ -2446,8 +2450,8 @@ // We may have 1 free scratch SGPR even though a carry out is // unavailable. Only one additional mov is needed. - Register TmpScaledReg = - RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + Register TmpScaledReg = RS->scavengeRegisterBackwards( + AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false); Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) @@ -2512,7 +2516,8 @@ FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { - Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, + MI, false, 0); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -159,50 +159,32 @@ ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 -; FLATSCR-NEXT: s_mov_b32 s8, 0 -; FLATSCR-NEXT: s_mov_b32 s12, 0 -; FLATSCR-NEXT: s_mov_b32 s11, 0 -; FLATSCR-NEXT: s_mov_b32 s10, 0 -; FLATSCR-NEXT: s_mov_b32 s9, 0 -; FLATSCR-NEXT: s_mov_b32 s13, 0 -; FLATSCR-NEXT: s_mov_b32 s7, 0 -; FLATSCR-NEXT: s_mov_b32 s5, 0 -; FLATSCR-NEXT: s_mov_b32 s3, 0 -; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: s_mov_b32 s0, 0 -; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: s_mov_b32 s4, 0 -; FLATSCR-NEXT: s_mov_b32 s6, 0 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s8 offset:8 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s12 offset:16 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s13 offset:48 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s7 offset:56 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s5 offset:64 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s3 offset:72 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s1 offset:80 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:8 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:16 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:24 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:32 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:40 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:48 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:56 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:64 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:72 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:80 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:88 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:96 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s4 offset:104 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s6 offset:112 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s8 offset:120 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s12 offset:128 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s11 offset:8 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:96 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:104 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:112 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:120 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:128 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:8 ; FLATSCR-NEXT: s_nop 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s10 offset:16 -; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s9 offset:24 -; FLATSCR-NEXT: s_mov_b32 s37, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s37 offset:32 -; FLATSCR-NEXT: s_mov_b32 s36, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s36 offset:40 -; FLATSCR-NEXT: s_mov_b32 s35, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s35 offset:48 -; FLATSCR-NEXT: s_mov_b32 s34, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s34 offset:56 -; FLATSCR-NEXT: s_mov_b32 s33, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s33 offset:64 +; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:16 +; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:24 +; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s0 offset:32 +; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:40 +; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:48 +; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s0 offset:56 +; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s0 offset:64 ; FLATSCR-NEXT: s_movk_i32 s32, 0x50 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -239,6 +239,8 @@ ; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:224 ; GCN-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:240 +; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GCN-NEXT: v_add_u32_e32 v1, 0x100, v1 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:256 ; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:260 ; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:264 @@ -308,10 +310,8 @@ ; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload ; GCN-NEXT: v_bfe_u32 v0, v6, 1, 6 -; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2 -; GCN-NEXT: v_add_u32_e32 v0, v2, v0 +; GCN-NEXT: v_add_u32_e32 v0, v1, v0 ; GCN-NEXT: v_and_b32_e32 v1, 1, v6 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -279,8 +279,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -459,9 +459,9 @@ ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX9-NEXT: s_add_i32 s1, s32, 0x100 +; GFX9-NEXT: s_add_i32 s0, s32, 0x100 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: v_add_u32_e32 v1, s1, v1 +; GFX9-NEXT: v_add_u32_e32 v1, s0, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_add_i32 s0, s32, 0x100 @@ -478,13 +478,13 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, s1, v0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX10-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-NEXT: v_add_nc_u32_e32 v1, s0, v1 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -544,8 +544,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -728,9 +728,9 @@ ; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 +; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: v_add_u32_e32 v1, s1, v1 +; GFX9-NEXT: v_add_u32_e32 v1, s0, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 @@ -747,13 +747,13 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, s1, v0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: v_add_nc_u32_e32 v1, s0, v1 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -767,8 +767,8 @@ ; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX940-NEXT: v_add_u32_e32 v1, s1, v1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX940-NEXT: v_add_u32_e32 v1, s0, v1 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 @@ -785,12 +785,11 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v1, s1, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v1, s0, v1 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: scratch_store_b32 v1, v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -816,10 +815,10 @@ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_movk_i32 s0, 0x3e80 -; GFX9-NEXT: scratch_store_dword off, v0, s1 offset:4 +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -481,25 +481,22 @@ ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 -; FLATSCR-NEXT: s_mov_b32 s6, 0 -; FLATSCR-NEXT: s_mov_b32 s5, 0 ; FLATSCR-NEXT: s_mov_b32 s4, 0 ; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s6 offset:4 +; FLATSCR-NEXT: scratch_store_short off, v0, s4 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] offset:2 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s5 offset:6 +; FLATSCR-NEXT: scratch_store_short off, v0, s4 offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] offset:4 -; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s4 offset:8 +; FLATSCR-NEXT: scratch_store_short off, v0, s0 offset:8 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:4 +; FLATSCR-NEXT: scratch_load_dword v0, off, s0 offset:4 ; FLATSCR-NEXT: scratch_load_dword v1, off, s0 offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -544,27 +541,24 @@ ; FLATSCR_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLATSCR_GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v2, 0 -; FLATSCR_GFX10-NEXT: s_mov_b32 s6, 0 -; FLATSCR_GFX10-NEXT: s_mov_b32 s5, 0 ; FLATSCR_GFX10-NEXT: s_mov_b32 s4, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s6 offset:4 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s4 offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] offset:2 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s5 offset:6 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s4 offset:6 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; FLATSCR_GFX10-NEXT: s_mov_b32 s1, 0 ; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s4 offset:8 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s0 offset:8 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: s_clause 0x1 -; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s1 offset:4 +; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s0 offset:4 ; FLATSCR_GFX10-NEXT: scratch_load_dword v1, off, s0 offset:6 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/extract-load-i1.ll b/llvm/test/CodeGen/AMDGPU/extract-load-i1.ll --- a/llvm/test/CodeGen/AMDGPU/extract-load-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-load-i1.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: flat_load_ubyte v0, v[0:1] ; CHECK-NEXT: v_and_b32_e32 v1, 7, v2 -; CHECK-NEXT: v_lshr_b32_e64 v9, s32, 6 -; CHECK-NEXT: v_or_b32_e32 v1, v9, v1 +; CHECK-NEXT: v_lshr_b32_e64 v2, s32, 6 +; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_bfe_u32 v2, v0, 1, 1 ; CHECK-NEXT: v_bfe_u32 v3, v0, 2, 2 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -117,21 +117,21 @@ ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s104, exec_lo +; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, exec_lo ; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s4 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s5, 0 +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s5 ; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_OPT-NEXT: v_writelane_b32 v72, s2, 0 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, 4 -; FLAT_SCR_OPT-NEXT: v_writelane_b32 v72, s3, 1 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s4 ; 4-byte Folded Spill +; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s5, 4 +; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1 +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s5 ; 4-byte Folded Spill ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v72, off, s4 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s5, 0 +; FLAT_SCR_OPT-NEXT: scratch_load_dword v0, off, s5 ; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s104 +; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s4 ; FLAT_SCR_OPT-NEXT: s_load_dword vcc_lo, s[0:1], 0x8 ; FLAT_SCR_OPT-NEXT: ; kill: killed $sgpr0_sgpr1 ; FLAT_SCR_OPT-NEXT: ;;#ASMSTART @@ -231,16 +231,16 @@ ; FLAT_SCR_OPT-NEXT: s_mov_b32 s2, exec_lo ; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, 3 ; FLAT_SCR_OPT-NEXT: s_mov_b32 s3, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v2, s3 +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v1, s3 ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_OPT-NEXT: s_mov_b32 s3, 4 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v2, off, s3 ; 4-byte Folded Reload +; FLAT_SCR_OPT-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_OPT-NEXT: s_mov_b32 s3, 0 ; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_OPT-NEXT: v_readlane_b32 s0, v2, 0 -; FLAT_SCR_OPT-NEXT: v_readlane_b32 s1, v2, 1 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v2, off, s3 +; FLAT_SCR_OPT-NEXT: v_readlane_b32 s0, v1, 0 +; FLAT_SCR_OPT-NEXT: v_readlane_b32 s1, v1, 1 +; FLAT_SCR_OPT-NEXT: scratch_load_dword v1, off, s3 ; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s2 @@ -251,21 +251,21 @@ ; FLAT_SCR_ARCH-LABEL: test: ; FLAT_SCR_ARCH: ; %bb.0: ; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s104, exec_lo +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, exec_lo ; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s4 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s5, 0 +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s5 ; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v72, s2, 0 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, 4 -; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v72, s3, 1 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s4 ; 4-byte Folded Spill +; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s5, 4 +; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1 +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s5 ; 4-byte Folded Spill ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v72, off, s4 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s5, 0 +; FLAT_SCR_ARCH-NEXT: scratch_load_dword v0, off, s5 ; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s104 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s4 ; FLAT_SCR_ARCH-NEXT: s_load_dword vcc_lo, s[0:1], 0x8 ; FLAT_SCR_ARCH-NEXT: ; kill: killed $sgpr0_sgpr1 ; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART @@ -365,16 +365,16 @@ ; FLAT_SCR_ARCH-NEXT: s_mov_b32 s2, exec_lo ; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, 3 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 s3, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v2, s3 +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v1, s3 ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 s3, 4 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v2, off, s3 ; 4-byte Folded Reload +; FLAT_SCR_ARCH-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 s3, 0 ; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s0, v2, 0 -; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s1, v2, 1 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v2, off, s3 +; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s0, v1, 0 +; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s1, v1, 1 +; FLAT_SCR_ARCH-NEXT: scratch_load_dword v1, off, s3 ; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s2 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -21,12 +21,9 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:52 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:36 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:20 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:52 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:20 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:4 ; GFX9-NEXT: s_endpgm ; @@ -84,12 +81,9 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:52 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:36 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:20 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:52 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:20 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:4 ; GFX9-PAL-NEXT: s_endpgm ; @@ -126,12 +120,9 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1010-PAL-NEXT: s_mov_b32 s3, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:52 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:36 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:20 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:52 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:20 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:4 ; GFX1010-PAL-NEXT: s_endpgm ; @@ -892,10 +883,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s4 offset:4 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 offset:4 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: s_mov_b32 s2, s0 ; GFX9-NEXT: s_mov_b32 s3, s0 @@ -903,12 +893,9 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:260 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:276 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:292 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:260 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:292 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:308 ; GFX9-NEXT: s_endpgm ; @@ -958,13 +945,12 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 s4, 0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 s1, s0 ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 @@ -973,12 +959,9 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:260 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:276 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:292 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:260 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:292 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:308 ; GFX9-PAL-NEXT: s_endpgm ; @@ -1009,9 +992,8 @@ ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 s4, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s0 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 @@ -1020,12 +1002,9 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1010-PAL-NEXT: s_mov_b32 s3, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:260 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:276 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:292 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:260 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:292 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:308 ; GFX1010-PAL-NEXT: s_endpgm ; @@ -1238,8 +1217,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -1298,14 +1277,14 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[4:5] ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc -; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -1348,8 +1327,8 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 ; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_waitcnt lgkmcnt(0) @@ -1426,8 +1405,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 ; GFX9-NEXT: s_addk_i32 s0, 0x104 @@ -1486,8 +1465,8 @@ ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 @@ -1528,11 +1507,11 @@ ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 +; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2 ; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104 @@ -1785,13 +1764,13 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -1853,13 +1832,13 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -1899,10 +1878,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s4 offset:4 glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 offset:4 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: s_mov_b32 s2, s0 ; GFX9-NEXT: s_mov_b32 s3, s0 @@ -1910,13 +1888,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_movk_i32 s3, 0x4004 -; GFX9-NEXT: s_movk_i32 s2, 0x4004 -; GFX9-NEXT: s_movk_i32 s1, 0x4004 ; GFX9-NEXT: s_movk_i32 s0, 0x4004 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-NEXT: s_endpgm ; @@ -1936,13 +1911,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_movk_i32 s3, 0x4004 -; GFX10-NEXT: s_movk_i32 s2, 0x4004 -; GFX10-NEXT: s_movk_i32 s1, 0x4004 ; GFX10-NEXT: s_movk_i32 s0, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX10-NEXT: s_endpgm ; @@ -1957,14 +1929,11 @@ ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: s_movk_i32 s3, 0x4004 -; GFX11-NEXT: s_movk_i32 s2, 0x4004 -; GFX11-NEXT: s_movk_i32 s1, 0x4004 ; GFX11-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-NEXT: s_clause 0x3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -1974,13 +1943,12 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 s4, 0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 s1, s0 ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 @@ -1989,13 +1957,10 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_movk_i32 s3, 0x4004 -; GFX9-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX9-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-PAL-NEXT: s_endpgm ; @@ -2009,13 +1974,10 @@ ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_movk_i32 s3, 0x4004 -; GFX940-NEXT: s_movk_i32 s2, 0x4004 -; GFX940-NEXT: s_movk_i32 s1, 0x4004 ; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s3 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1 ; GFX940-NEXT: s_endpgm ; @@ -2030,9 +1992,8 @@ ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 s4, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s0 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 @@ -2041,13 +2002,10 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1010-PAL-NEXT: s_movk_i32 s3, 0x4004 -; GFX1010-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX1010-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX1010-PAL-NEXT: s_endpgm ; @@ -2072,13 +2030,10 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1030-PAL-NEXT: s_movk_i32 s3, 0x4004 -; GFX1030-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX1030-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX1030-PAL-NEXT: s_endpgm ; @@ -2093,14 +2048,11 @@ ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-PAL-NEXT: s_movk_i32 s3, 0x4004 -; GFX11-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX11-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX11-PAL-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-PAL-NEXT: s_clause 0x3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:16 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm @@ -2126,13 +2078,13 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX9-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -2151,13 +2103,13 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX10-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2175,14 +2127,13 @@ ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX11-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-NEXT: s_clause 0x3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:16 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2200,13 +2151,13 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -2222,41 +2173,69 @@ ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s3 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 sc0 sc1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-PAL-LABEL: zero_init_large_offset_foo: -; GFX10-PAL: ; %bb.0: -; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc -; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: s_mov_b32 s0, 0 -; GFX10-PAL-NEXT: s_mov_b32 s1, s0 -; GFX10-PAL-NEXT: s_mov_b32 s2, s0 -; GFX10-PAL-NEXT: s_mov_b32 s3, s0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-PAL-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] +; GFX1010-PAL-LABEL: zero_init_large_offset_foo: +; GFX1010-PAL: ; %bb.0: +; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc +; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 +; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 +; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 +; GFX1010-PAL-NEXT: s_mov_b32 s3, s0 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1010-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-PAL-LABEL: zero_init_large_offset_foo: +; GFX1030-PAL: ; %bb.0: +; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc +; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 +; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 +; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 +; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1030-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX1030-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX1030-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX1030-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: zero_init_large_offset_foo: ; GFX11-PAL: ; %bb.0: @@ -2271,14 +2250,13 @@ ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-PAL-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-PAL-NEXT: s_clause 0x3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 offset:16 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:32 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:16 +; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 +; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] @@ -2296,8 +2274,8 @@ ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s2 offset:4 glc +; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 @@ -2356,14 +2334,14 @@ ; GFX9-PAL-NEXT: s_getpc_b64 s[4:5] ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc -; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -2406,8 +2384,8 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 ; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_waitcnt lgkmcnt(0) @@ -2484,8 +2462,8 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s1 offset:4 glc +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_load_dword v0, off, s0 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 ; GFX9-NEXT: s_addk_i32 s0, 0x4004 @@ -2544,8 +2522,8 @@ ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc +; GFX9-PAL-NEXT: s_mov_b32 s1, 0 +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 @@ -2586,11 +2564,11 @@ ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s2 offset:4 glc dlc +; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 +; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2 ; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004 @@ -2846,13 +2824,13 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -2864,13 +2842,12 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: scratch_store_b32 v0, v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, s0 glc dlc @@ -2899,8 +2876,8 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX940-NEXT: v_mov_b32_e32 v1, s1 +; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX940-NEXT: v_mov_b32_e32 v1, s0 ; GFX940-NEXT: v_lshl_add_u32 v1, v0, 2, v1 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 @@ -2917,13 +2894,13 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -2935,13 +2912,12 @@ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 -; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX11-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 +; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, off dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s0 glc dlc @@ -2966,10 +2942,10 @@ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-NEXT: scratch_store_dword off, v0, s1 offset:4 +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 @@ -3016,14 +2992,14 @@ ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 +; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4 +; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 @@ -3919,10 +3895,9 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:3024 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:3024 +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:3024 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 16 @@ -3994,10 +3969,9 @@ ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:3024 -; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:3024 +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:3024 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 16 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -89,8 +89,8 @@ ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr1 = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $sgpr1, 4, implicit-def $scc + ; GCN-NEXT: $sgpr0 = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $sgpr0, 4, implicit-def $scc ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $sgpr0 = S_ADD_I32 killed $sgpr0, 8, implicit-def $scc @@ -167,8 +167,8 @@ ; GCN: liveins: $sgpr10, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc - ; GCN-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec + ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ; GCN-NEXT: renamable $sgpr5 = S_MUL_I32 killed $sgpr0, $sgpr4 ; GCN-NEXT: renamable $sgpr6 = S_ADDC_U32 $sgpr5, 1234567, implicit-def dead $scc, implicit $scc ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -47,22 +47,22 @@ ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s4 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[10:11], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v2, s30, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s30, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[10:11] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) @@ -70,24 +70,24 @@ ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v2, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v0, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v1, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v0, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -61,10 +61,10 @@ ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: s_movk_i32 s3, 0x2000 -; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, s3 +; FLATSCR-NEXT: s_movk_i32 s2, 0x2000 +; FLATSCR-NEXT: scratch_store_dword off, v0, s2 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_mov_b32 s2, 0 ; FLATSCR-NEXT: .LBB0_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 ; FLATSCR-NEXT: s_add_i32 s3, s2, 0x3000 @@ -155,10 +155,10 @@ ; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: s_add_i32 s32, s32, 0x8000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 -; FLATSCR-NEXT: s_add_i32 s1, s33, 0x2000 -; FLATSCR-NEXT: s_mov_b32 s0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v2, s1 +; FLATSCR-NEXT: s_add_i32 s0, s33, 0x2000 +; FLATSCR-NEXT: scratch_store_dword off, v2, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 ; FLATSCR-NEXT: s_add_i32 s3, s33, 0x3000 @@ -233,35 +233,35 @@ ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_or_b32_e32 v1, 0x12cc, v0 ; MUBUF-NEXT: v_or_b32_e32 v0, 0x12c8, v0 -; MUBUF-NEXT: v_mov_b32_e32 v18, 0x4000 -; MUBUF-NEXT: v_mov_b32_e32 v17, 0x4000 -; MUBUF-NEXT: v_mov_b32_e32 v16, 0x4000 +; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000 ; MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v15, 0x4000 +; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000 ; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v14, 0x4000 -; MUBUF-NEXT: buffer_load_dword v8, v18, s[0:3], 0 offen glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v9, v17, s[0:3], 0 offen offset:4 glc +; MUBUF-NEXT: v_mov_b32_e32 v10, 0x4000 +; MUBUF-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v2, v16, s[0:3], 0 offen offset:8 glc +; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000 +; MUBUF-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:4 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v3, v15, s[0:3], 0 offen offset:12 glc +; MUBUF-NEXT: v_mov_b32_e32 v11, 0x4000 +; MUBUF-NEXT: buffer_load_dword v2, v3, s[0:3], 0 offen offset:8 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v10, v14, s[0:3], 0 offen offset:16 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 -; MUBUF-NEXT: buffer_load_dword v11, v13, s[0:3], 0 offen offset:20 glc +; MUBUF-NEXT: v_mov_b32_e32 v12, 0x4000 +; MUBUF-NEXT: buffer_load_dword v3, v10, s[0:3], 0 offen offset:12 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; MUBUF-NEXT: v_mov_b32_e32 v12, 0 +; MUBUF-NEXT: buffer_load_dword v10, v11, s[0:3], 0 offen offset:16 glc +; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 +; MUBUF-NEXT: buffer_load_dword v11, v12, s[0:3], 0 offen offset:20 glc +; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc ; MUBUF-NEXT: v_add_co_u32_e32 v0, vcc, v7, v8 ; MUBUF-NEXT: v_addc_co_u32_e32 v1, vcc, v6, v9, vcc ; MUBUF-NEXT: v_add_co_u32_e32 v4, vcc, v4, v10 +; MUBUF-NEXT: v_mov_b32_e32 v12, 0 ; MUBUF-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v11, vcc ; MUBUF-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF-NEXT: global_store_dwordx2 v12, v[4:5], s[4:5] offset:16 @@ -275,9 +275,8 @@ ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: s_mov_b32 s3, 0 ; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, s3 offset:1024 +; FLATSCR-NEXT: scratch_store_dword off, v0, s2 offset:1024 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: .LBB2_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -294,9 +293,8 @@ ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s3, 0x2000 ; FLATSCR-NEXT: s_movk_i32 s2, 0x2000 -; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s3 offset:16 glc +; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s2 offset:16 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s2 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -87,10 +87,10 @@ ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr9, implicit $exec - ; GFX9-FLATSCR-NEXT: $sgpr8 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr8, $vgpr1, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr4, $vgpr1, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll --- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll @@ -34,7 +34,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 @@ -102,7 +102,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 @@ -155,7 +155,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 @@ -237,7 +237,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 @@ -293,7 +293,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 @@ -349,7 +349,7 @@ ; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s1, 0 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0 ; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) ; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff ; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir @@ -77,7 +77,7 @@ ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0 + ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) @@ -333,7 +333,7 @@ ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0 + ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -444,7 +444,7 @@ ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; VMEM-GFX8-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0 @@ -452,7 +452,7 @@ ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -508,7 +508,7 @@ ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; VMEM-GFX8-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -516,7 +516,7 @@ ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; VMEM-GFX8-NEXT: $sgpr8 = V_READLANE_B32 killed $vgpr0, 0 ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir @@ -26,13 +26,13 @@ ; CHECK: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr2 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr2 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr2 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -66,13 +66,13 @@ ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -82,33 +82,33 @@ ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 @@ -121,32 +121,32 @@ ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 @@ -171,44 +171,44 @@ ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -229,13 +229,13 @@ ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 @@ -245,33 +245,33 @@ ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr3 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr3 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0 @@ -284,32 +284,32 @@ ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0 @@ -334,44 +334,44 @@ ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 @@ -388,13 +388,13 @@ ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -404,33 +404,33 @@ ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 @@ -443,32 +443,32 @@ ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 @@ -493,44 +493,44 @@ ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -622,30 +622,30 @@ ; GCN64-MUBUF-NEXT: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0 - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1 - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 - ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1 - ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2 - ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -656,31 +656,31 @@ ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4 - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1 - ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2 - ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3 - ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4 - ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5 - ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6 - ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7 - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -703,44 +703,44 @@ ; GCN64-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4 - ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5 - ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6 - ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7 - ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8 - ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9 - ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10 - ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11 - ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12 - ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13 - ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14 - ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15 - ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16 - ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17 - ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18 - ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19 - ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20 - ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21 - ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22 - ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23 - ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24 - ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25 - ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26 - ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27 - ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28 - ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29 - ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30 - ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31 - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -759,30 +759,30 @@ ; GCN32-MUBUF-NEXT: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0 - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1 - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2 - ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr3 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1 - ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2 - ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr3 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0 ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -793,31 +793,31 @@ ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1 - ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2 - ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3 - ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4 - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1 - ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2 - ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3 - ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4 - ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5 - ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6 - ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7 - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4 + ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5 + ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6 + ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0 ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -840,44 +840,44 @@ ; GCN32-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1 - ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2 - ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3 - ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4 - ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5 - ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6 - ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7 - ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8 - ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9 - ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10 - ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11 - ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12 - ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13 - ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14 - ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15 - ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16 - ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17 - ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18 - ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19 - ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20 - ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21 - ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22 - ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23 - ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24 - ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25 - ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26 - ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27 - ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28 - ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29 - ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30 - ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31 - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -892,30 +892,30 @@ ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0 - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1 - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 - ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1 - ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2 - ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -926,31 +926,31 @@ ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4 - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1 - ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2 - ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3 - ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4 - ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5 - ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6 - ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7 - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -973,44 +973,44 @@ ; GCN64-FLATSCR-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4 - ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5 - ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6 - ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7 - ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8 - ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9 - ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10 - ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11 - ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12 - ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13 - ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14 - ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15 - ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16 - ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17 - ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18 - ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19 - ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20 - ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21 - ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22 - ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23 - ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24 - ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25 - ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26 - ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27 - ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28 - ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29 - ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30 - ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31 - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -1621,22 +1621,22 @@ ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 s[14:15], exec +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:456 -; GCN-NEXT: v_writelane_b32 v1, s30, 0 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:456 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 +; GCN-NEXT: v_writelane_b32 v0, s30, 0 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[14:15] -; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: s_mov_b64 exec, 1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: v_writelane_b32 v0, s31, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[12:13] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra@rel32@hi+12 @@ -1645,15 +1645,15 @@ ; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] ; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:456 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v1, 0 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:456 +; GCN-NEXT: v_readlane_b32 s31, v0, 0 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: s_mov_b64 exec, 1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll --- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -26,8 +26,8 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 s1, 0 -; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc +; FLATSCR-NEXT: s_mov_b32 s0, 0 +; FLATSCR-NEXT: scratch_load_dword v0, off, s0 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0xffc ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill @@ -79,8 +79,8 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 s1, 0 -; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc +; FLATSCR-NEXT: s_mov_b32 s0, 0 +; FLATSCR-NEXT: scratch_load_dword v0, off, s0 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill @@ -218,10 +218,10 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 s9, 0 +; FLATSCR-NEXT: s_mov_b32 s8, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s9 offset:8 glc +; FLATSCR-NEXT: scratch_load_dword v0, off, s8 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s8, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v0, s8 ; 4-byte Folded Spill @@ -301,16 +301,17 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:12 glc +; FLATSCR-NEXT: s_mov_b32 s0, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:12 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0xff8 -; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill +; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc +; FLATSCR-NEXT: scratch_load_dword v0, off, s0 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_movk_i32 s0, 0xff8 ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART @@ -367,16 +368,17 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:12 glc +; FLATSCR-NEXT: s_mov_b32 s0, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:12 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: s_mov_b32 s1, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill +; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s1 offset:8 glc +; FLATSCR-NEXT: scratch_load_dword v0, off, s0 offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_movk_i32 s0, 0xffc ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -10094,7 +10094,6 @@ ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:240 ; GFX6-NEXT: s_addc_u32 s41, s41, 0 ; GFX6-NEXT: s_mov_b32 s2, 0x83400 -; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt vmcnt(0) @@ -10215,6 +10214,7 @@ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4 ; GFX6-NEXT: v_mov_b32_e32 v7, 1 +; GFX6-NEXT: s_mov_b64 s[2:3], exec ; GFX6-NEXT: buffer_store_dword v7, v4, s[40:43], 0 offen ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ; def s[4:11] @@ -10230,12 +10230,12 @@ ; GFX6-NEXT: v_writelane_b32 v4, s9, 5 ; GFX6-NEXT: v_writelane_b32 v4, s10, 6 ; GFX6-NEXT: v_writelane_b32 v4, s11, 7 -; GFX6-NEXT: s_mov_b32 s2, 0x83800 -; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill +; GFX6-NEXT: s_mov_b32 s12, 0x83800 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s12 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 exec, s[2:3] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ; def s[8:15] @@ -10263,251 +10263,251 @@ ; GFX6-NEXT: ; %bb.1: ; %bb0 ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v9, s8, 0 -; GFX6-NEXT: v_writelane_b32 v9, s9, 1 -; GFX6-NEXT: v_writelane_b32 v9, s10, 2 -; GFX6-NEXT: v_writelane_b32 v9, s11, 3 -; GFX6-NEXT: v_writelane_b32 v9, s12, 4 -; GFX6-NEXT: v_writelane_b32 v9, s13, 5 -; GFX6-NEXT: v_writelane_b32 v9, s14, 6 -; GFX6-NEXT: v_writelane_b32 v9, s15, 7 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2100 -; GFX6-NEXT: buffer_store_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Spill +; GFX6-NEXT: v_writelane_b32 v4, s8, 0 +; GFX6-NEXT: v_writelane_b32 v4, s9, 1 +; GFX6-NEXT: v_writelane_b32 v4, s10, 2 +; GFX6-NEXT: v_writelane_b32 v4, s11, 3 +; GFX6-NEXT: v_writelane_b32 v4, s12, 4 +; GFX6-NEXT: v_writelane_b32 v4, s13, 5 +; GFX6-NEXT: v_writelane_b32 v4, s14, 6 +; GFX6-NEXT: v_writelane_b32 v4, s15, 7 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2100 +; GFX6-NEXT: buffer_store_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x20e0 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x20e0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s8, v8, 0 -; GFX6-NEXT: v_readlane_b32 s9, v8, 1 -; GFX6-NEXT: v_readlane_b32 s10, v8, 2 -; GFX6-NEXT: v_readlane_b32 s11, v8, 3 -; GFX6-NEXT: v_readlane_b32 s12, v8, 4 -; GFX6-NEXT: v_readlane_b32 s13, v8, 5 -; GFX6-NEXT: v_readlane_b32 s14, v8, 6 -; GFX6-NEXT: v_readlane_b32 s15, v8, 7 -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s8, v4, 0 +; GFX6-NEXT: v_readlane_b32 s9, v4, 1 +; GFX6-NEXT: v_readlane_b32 s10, v4, 2 +; GFX6-NEXT: v_readlane_b32 s11, v4, 3 +; GFX6-NEXT: v_readlane_b32 s12, v4, 4 +; GFX6-NEXT: v_readlane_b32 s13, v4, 5 +; GFX6-NEXT: v_readlane_b32 s14, v4, 6 +; GFX6-NEXT: v_readlane_b32 s15, v4, 7 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v7, s16, 0 -; GFX6-NEXT: v_writelane_b32 v7, s17, 1 -; GFX6-NEXT: v_writelane_b32 v7, s18, 2 -; GFX6-NEXT: v_writelane_b32 v7, s19, 3 -; GFX6-NEXT: v_writelane_b32 v7, s20, 4 -; GFX6-NEXT: v_writelane_b32 v7, s21, 5 -; GFX6-NEXT: v_writelane_b32 v7, s22, 6 -; GFX6-NEXT: v_writelane_b32 v7, s23, 7 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2120 -; GFX6-NEXT: buffer_store_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Spill +; GFX6-NEXT: v_writelane_b32 v4, s16, 0 +; GFX6-NEXT: v_writelane_b32 v4, s17, 1 +; GFX6-NEXT: v_writelane_b32 v4, s18, 2 +; GFX6-NEXT: v_writelane_b32 v4, s19, 3 +; GFX6-NEXT: v_writelane_b32 v4, s20, 4 +; GFX6-NEXT: v_writelane_b32 v4, s21, 5 +; GFX6-NEXT: v_writelane_b32 v4, s22, 6 +; GFX6-NEXT: v_writelane_b32 v4, s23, 7 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2120 +; GFX6-NEXT: buffer_store_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2100 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2100 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s16, v9, 0 -; GFX6-NEXT: v_readlane_b32 s17, v9, 1 -; GFX6-NEXT: v_readlane_b32 s18, v9, 2 -; GFX6-NEXT: v_readlane_b32 s19, v9, 3 -; GFX6-NEXT: v_readlane_b32 s20, v9, 4 -; GFX6-NEXT: v_readlane_b32 s21, v9, 5 -; GFX6-NEXT: v_readlane_b32 s22, v9, 6 -; GFX6-NEXT: v_readlane_b32 s23, v9, 7 -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s16, v4, 0 +; GFX6-NEXT: v_readlane_b32 s17, v4, 1 +; GFX6-NEXT: v_readlane_b32 s18, v4, 2 +; GFX6-NEXT: v_readlane_b32 s19, v4, 3 +; GFX6-NEXT: v_readlane_b32 s20, v4, 4 +; GFX6-NEXT: v_readlane_b32 s21, v4, 5 +; GFX6-NEXT: v_readlane_b32 s22, v4, 6 +; GFX6-NEXT: v_readlane_b32 s23, v4, 7 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v8, s24, 0 -; GFX6-NEXT: v_writelane_b32 v8, s25, 1 -; GFX6-NEXT: v_writelane_b32 v8, s26, 2 -; GFX6-NEXT: v_writelane_b32 v8, s27, 3 -; GFX6-NEXT: v_writelane_b32 v8, s28, 4 -; GFX6-NEXT: v_writelane_b32 v8, s29, 5 -; GFX6-NEXT: v_writelane_b32 v8, s30, 6 -; GFX6-NEXT: v_writelane_b32 v8, s31, 7 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2140 -; GFX6-NEXT: buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill +; GFX6-NEXT: v_writelane_b32 v4, s24, 0 +; GFX6-NEXT: v_writelane_b32 v4, s25, 1 +; GFX6-NEXT: v_writelane_b32 v4, s26, 2 +; GFX6-NEXT: v_writelane_b32 v4, s27, 3 +; GFX6-NEXT: v_writelane_b32 v4, s28, 4 +; GFX6-NEXT: v_writelane_b32 v4, s29, 5 +; GFX6-NEXT: v_writelane_b32 v4, s30, 6 +; GFX6-NEXT: v_writelane_b32 v4, s31, 7 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2140 +; GFX6-NEXT: buffer_store_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2120 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2120 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s24, v7, 0 -; GFX6-NEXT: v_readlane_b32 s25, v7, 1 -; GFX6-NEXT: v_readlane_b32 s26, v7, 2 -; GFX6-NEXT: v_readlane_b32 s27, v7, 3 -; GFX6-NEXT: v_readlane_b32 s28, v7, 4 -; GFX6-NEXT: v_readlane_b32 s29, v7, 5 -; GFX6-NEXT: v_readlane_b32 s30, v7, 6 -; GFX6-NEXT: v_readlane_b32 s31, v7, 7 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s24, v4, 0 +; GFX6-NEXT: v_readlane_b32 s25, v4, 1 +; GFX6-NEXT: v_readlane_b32 s26, v4, 2 +; GFX6-NEXT: v_readlane_b32 s27, v4, 3 +; GFX6-NEXT: v_readlane_b32 s28, v4, 4 +; GFX6-NEXT: v_readlane_b32 s29, v4, 5 +; GFX6-NEXT: v_readlane_b32 s30, v4, 6 +; GFX6-NEXT: v_readlane_b32 s31, v4, 7 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v10, s0, 0 -; GFX6-NEXT: v_writelane_b32 v10, s1, 1 -; GFX6-NEXT: v_writelane_b32 v10, s2, 2 -; GFX6-NEXT: v_writelane_b32 v10, s3, 3 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2160 -; GFX6-NEXT: buffer_store_dword v10, v4, s[40:43], 0 offen ; 4-byte Folded Spill +; GFX6-NEXT: v_writelane_b32 v4, s0, 0 +; GFX6-NEXT: v_writelane_b32 v4, s1, 1 +; GFX6-NEXT: v_writelane_b32 v4, s2, 2 +; GFX6-NEXT: v_writelane_b32 v4, s3, 3 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2160 +; GFX6-NEXT: buffer_store_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 s[0:1], exec ; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v8, s4, 0 -; GFX6-NEXT: v_writelane_b32 v8, s5, 1 -; GFX6-NEXT: v_writelane_b32 v8, s6, 2 -; GFX6-NEXT: v_writelane_b32 v8, s7, 3 -; GFX6-NEXT: s_mov_b32 s0, 0x85c00 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s0 ; 4-byte Folded Spill +; GFX6-NEXT: v_writelane_b32 v4, s4, 0 +; GFX6-NEXT: v_writelane_b32 v4, s5, 1 +; GFX6-NEXT: v_writelane_b32 v4, s6, 2 +; GFX6-NEXT: v_writelane_b32 v4, s7, 3 +; GFX6-NEXT: s_mov_b32 s44, 0x85c00 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s44 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 exec, s[0:1] ; GFX6-NEXT: s_mov_b64 s[0:1], exec ; GFX6-NEXT: s_mov_b64 exec, 3 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v9, s2, 0 -; GFX6-NEXT: v_writelane_b32 v9, s3, 1 +; GFX6-NEXT: v_writelane_b32 v4, s2, 0 +; GFX6-NEXT: v_writelane_b32 v4, s3, 1 ; GFX6-NEXT: s_mov_b32 s4, 0x86600 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s4 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[0:1] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2140 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2140 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s0, v7, 0 -; GFX6-NEXT: v_readlane_b32 s1, v7, 1 -; GFX6-NEXT: v_readlane_b32 s2, v7, 2 -; GFX6-NEXT: v_readlane_b32 s3, v7, 3 -; GFX6-NEXT: v_readlane_b32 s4, v7, 4 -; GFX6-NEXT: v_readlane_b32 s5, v7, 5 -; GFX6-NEXT: v_readlane_b32 s6, v7, 6 -; GFX6-NEXT: v_readlane_b32 s7, v7, 7 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s0, v4, 0 +; GFX6-NEXT: v_readlane_b32 s1, v4, 1 +; GFX6-NEXT: v_readlane_b32 s2, v4, 2 +; GFX6-NEXT: v_readlane_b32 s3, v4, 3 +; GFX6-NEXT: v_readlane_b32 s4, v4, 4 +; GFX6-NEXT: v_readlane_b32 s5, v4, 5 +; GFX6-NEXT: v_readlane_b32 s6, v4, 6 +; GFX6-NEXT: v_readlane_b32 s7, v4, 7 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v8, s36, 0 -; GFX6-NEXT: v_writelane_b32 v8, s37, 1 -; GFX6-NEXT: v_writelane_b32 v8, s38, 2 -; GFX6-NEXT: v_writelane_b32 v8, s39, 3 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2180 -; GFX6-NEXT: buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill +; GFX6-NEXT: v_writelane_b32 v4, s36, 0 +; GFX6-NEXT: v_writelane_b32 v4, s37, 1 +; GFX6-NEXT: v_writelane_b32 v4, s38, 2 +; GFX6-NEXT: v_writelane_b32 v4, s39, 3 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2180 +; GFX6-NEXT: buffer_store_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_mov_b64 s[38:39], exec ; GFX6-NEXT: s_mov_b64 exec, 3 -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v10, s36, 0 -; GFX6-NEXT: v_writelane_b32 v10, s37, 1 +; GFX6-NEXT: v_writelane_b32 v4, s36, 0 +; GFX6-NEXT: v_writelane_b32 v4, s37, 1 ; GFX6-NEXT: s_mov_b32 s44, 0x86400 -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s44 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s44 ; 4-byte Folded Spill ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], 0 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[38:39] ; GFX6-NEXT: s_mov_b64 s[44:45], exec ; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2170 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2170 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s36, v9, 0 -; GFX6-NEXT: v_readlane_b32 s37, v9, 1 -; GFX6-NEXT: v_readlane_b32 s38, v9, 2 -; GFX6-NEXT: v_readlane_b32 s39, v9, 3 -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s36, v4, 0 +; GFX6-NEXT: v_readlane_b32 s37, v4, 1 +; GFX6-NEXT: v_readlane_b32 s38, v4, 2 +; GFX6-NEXT: v_readlane_b32 s39, v4, 3 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[44:45] ; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2190 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2190 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2190 -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2190 +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_not_b64 exec, exec ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s44, v7, 0 -; GFX6-NEXT: v_readlane_b32 s45, v7, 1 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s44, v4, 0 +; GFX6-NEXT: v_readlane_b32 s45, v4, 1 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_not_b64 exec, exec ; GFX6-NEXT: s_mov_b64 vcc, s[34:35] ; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2198 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2198 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2198 -; GFX6-NEXT: buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: v_mov_b32_e32 v7, 0x2198 +; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload ; GFX6-NEXT: s_not_b64 exec, exec ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s34, v8, 0 -; GFX6-NEXT: v_readlane_b32 s35, v8, 1 -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s34, v4, 0 +; GFX6-NEXT: v_readlane_b32 s35, v4, 1 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_not_b64 exec, exec ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35],s[44:45] ; GFX6-NEXT: ;;#ASMEND ; GFX6-NEXT: s_mov_b64 s[34:35], vcc -; GFX6-NEXT: s_mov_b64 s[8:9], exec +; GFX6-NEXT: s_mov_b64 s[0:1], exec ; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: s_mov_b32 s0, 0x86000 +; GFX6-NEXT: s_mov_b32 s2, 0x86000 ; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s0 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s2 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_readlane_b32 s36, v4, 0 ; GFX6-NEXT: v_readlane_b32 s37, v4, 1 @@ -10515,19 +10515,19 @@ ; GFX6-NEXT: v_readlane_b32 s39, v4, 3 ; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[8:9] +; GFX6-NEXT: s_mov_b64 exec, s[0:1] ; GFX6-NEXT: s_mov_b64 s[4:5], exec ; GFX6-NEXT: s_mov_b64 exec, 15 ; GFX6-NEXT: s_mov_b32 s6, 0x85800 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s6 ; 4-byte Folded Reload ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s0, v7, 0 -; GFX6-NEXT: v_readlane_b32 s1, v7, 1 -; GFX6-NEXT: v_readlane_b32 s2, v7, 2 -; GFX6-NEXT: v_readlane_b32 s3, v7, 3 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: v_readlane_b32 s0, v4, 0 +; GFX6-NEXT: v_readlane_b32 s1, v4, 1 +; GFX6-NEXT: v_readlane_b32 s2, v4, 2 +; GFX6-NEXT: v_readlane_b32 s3, v4, 3 +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_mov_b64 exec, s[4:5] ; GFX6-NEXT: s_mov_b32 s2, 0x83800 diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -48,23 +48,23 @@ ; GFX9-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-NEXT: $vcc = IMPLICIT_DEF - ; GFX9-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc - ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX9-NEXT: $vcc = IMPLICIT_DEF - ; GFX9-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc - ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -84,23 +84,23 @@ ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $vcc = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc - ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10-NEXT: $vcc = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1 - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc - ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -114,23 +114,23 @@ ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX11-NEXT: $vcc = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc - ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GFX11-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX11-NEXT: $vcc = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc - ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GFX11-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -68,9 +68,10 @@ ; FLATSCR-NEXT: s_and_saveexec_b32 s0, vcc_lo ; FLATSCR-NEXT: s_cbranch_execz .LBB0_2 ; FLATSCR-NEXT: ; %bb.1: ; %if.then4.i -; FLATSCR-NEXT: s_movk_i32 s1, 0x4000 +; FLATSCR-NEXT: s_movk_i32 s0, 0x4000 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:4 +; FLATSCR-NEXT: s_waitcnt_depctr 0xffe3 ; FLATSCR-NEXT: s_mov_b32 s0, 0x41c64e6d -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s1 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0 ; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 @@ -95,9 +96,9 @@ ; MUBUF11-NEXT: v_cmpx_ne_u32_e32 0, v0 ; MUBUF11-NEXT: s_cbranch_execz .LBB0_2 ; MUBUF11-NEXT: ; %bb.1: ; %if.then4.i -; MUBUF11-NEXT: s_movk_i32 s1, 0x4000 +; MUBUF11-NEXT: s_movk_i32 s0, 0x4000 +; MUBUF11-NEXT: scratch_load_b64 v[0:1], off, s0 offset:4 ; MUBUF11-NEXT: s_mov_b32 s0, 0x41c64e6d -; MUBUF11-NEXT: scratch_load_b64 v[0:1], off, s1 offset:4 ; MUBUF11-NEXT: s_waitcnt vmcnt(0) ; MUBUF11-NEXT: v_add_nc_u32_e32 v2, v1, v0 ; MUBUF11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 @@ -123,9 +124,9 @@ ; FLATSCR11-NEXT: v_cmpx_ne_u32_e32 0, v0 ; FLATSCR11-NEXT: s_cbranch_execz .LBB0_2 ; FLATSCR11-NEXT: ; %bb.1: ; %if.then4.i -; FLATSCR11-NEXT: s_movk_i32 s1, 0x4000 +; FLATSCR11-NEXT: s_movk_i32 s0, 0x4000 +; FLATSCR11-NEXT: scratch_load_b64 v[0:1], off, s0 offset:4 ; FLATSCR11-NEXT: s_mov_b32 s0, 0x41c64e6d -; FLATSCR11-NEXT: scratch_load_b64 v[0:1], off, s1 offset:4 ; FLATSCR11-NEXT: s_waitcnt vmcnt(0) ; FLATSCR11-NEXT: v_add_nc_u32_e32 v2, v1, v0 ; FLATSCR11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039