diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -68,6 +68,8 @@ CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; + unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override; + // Stack access is very expensive. CSRs are also the high registers, and we // want to minimize the number of used registers. unsigned getCSRFirstUseCost() const override { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -149,6 +149,20 @@ } } +unsigned +SIRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const { + // Index 0: The default cost model. No specific cost value for registers. + // Index 1: Cost model with CostPerUse value equivalent to + // register_index/allocation_granularity. Applied on VGPRs. + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + + // For kernels with no calling convention. + if (CSRegs && *CSRegs == AMDGPU::NoRegister) + return 0; + + return 1; +} + const uint32_t *SIRegisterInfo::getNoPreservedMask() const { return CSR_AMDGPU_NoRegs_RegMask; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -281,9 +281,11 @@ // VGPR registers foreach Index = 0...255 in { - defm VGPR#Index : - SIRegLoHi16 <"v"#Index, Index, 0, 1>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>; + let CostPerUse=[0, !if(!gt(Index, 31), !srl(Index, 2), 0)] in { + defm VGPR#Index : + SIRegLoHi16 <"v"#Index, Index, 0, 1>, + DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>; + } } // AccVGPR registers diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -9,7 +9,7 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 -; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -25,285 +25,316 @@ ; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc -; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc ; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 +; GCN-NEXT: global_load_dwordx4 v[7:10], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[11:14], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[3:4], off offset:48 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v3 ; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 -; GCN-NEXT: v_add_u32_e32 v3, 0x100, v3 -; GCN-NEXT: v_add_u32_e32 v60, 16, v3 -; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v17, vcc -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 -; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v17, vcc -; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v4, vcc +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GCN-NEXT: v_lshrrev_b32_e64 v19, 6, s33 +; GCN-NEXT: v_add_u32_e32 v19, 0x100, v19 +; GCN-NEXT: v_add_u32_e32 v60, 16, v19 +; GCN-NEXT: s_add_u32 s32, s32, 0x14000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x14000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:704 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:708 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:712 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:716 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:720 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:724 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:728 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:732 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:736 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:740 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:744 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:748 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:752 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:756 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:760 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:764 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 -; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 -; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 -; GCN-NEXT: global_load_dwordx4 v[44:47], v[52:53], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off offset:64 +; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:128 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:192 +; GCN-NEXT: v_add_u32_e32 v0, 20, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[52:53], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:16 -; GCN-NEXT: v_add_u32_e32 v0, 20, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[40:43], v[56:57], off offset:16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[44:47], v[56:57], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 ; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 24, v3 +; GCN-NEXT: v_add_u32_e32 v0, 24, v19 ; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 28, v3 +; GCN-NEXT: v_add_u32_e32 v0, 28, v19 ; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 32, v3 +; GCN-NEXT: v_add_u32_e32 v0, 32, v19 ; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 36, v3 +; GCN-NEXT: v_add_u32_e32 v0, 36, v19 ; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 40, v3 +; GCN-NEXT: v_add_u32_e32 v0, 40, v19 ; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 44, v3 +; GCN-NEXT: v_add_u32_e32 v0, 44, v19 ; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 48, v3 +; GCN-NEXT: v_add_u32_e32 v0, 48, v19 ; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 52, v3 +; GCN-NEXT: v_add_u32_e32 v0, 52, v19 ; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 56, v3 +; GCN-NEXT: v_add_u32_e32 v0, 56, v19 ; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 60, v3 +; GCN-NEXT: v_add_u32_e32 v0, 60, v19 ; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 64, v3 -; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x44, v3 -; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x48, v3 -; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v3 -; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x50, v3 -; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 64, v19 +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x44, v19 ; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x54, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x48, v19 ; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x58, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v19 ; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x50, v19 ; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x60, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x54, v19 ; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x64, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x58, v19 ; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x68, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v19 ; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x60, v19 ; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0x70, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v12, v32 +; GCN-NEXT: v_add_u32_e32 v0, 0x64, v19 ; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v13, v33 -; GCN-NEXT: v_add_u32_e32 v0, 0x74, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x68, v19 ; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v14, v34 -; GCN-NEXT: v_add_u32_e32 v0, 0x78, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v19 ; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v15, v35 -; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x70, v19 ; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x80, v3 -; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x84, v3 -; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x88, v3 -; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v3 -; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x90, v3 -; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x94, v3 -; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x98, v3 -; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v3 -; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x74, v19 +; GCN-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x78, v19 +; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v19 +; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x80, v19 +; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x84, v19 +; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x88, v19 +; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v19 +; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x90, v19 +; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x94, v19 ; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x98, v19 ; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xac, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v19 ; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v19 +; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v19 +; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v19 +; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xac, v19 +; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v19 ; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v19 ; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v19 ; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v19 ; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v3 -; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v3 -; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v3 -; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v3 -; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 4, v3 -; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 8, v3 -; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 12, v3 -; GCN-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, v8 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v19 +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:704 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:708 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:712 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:716 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:720 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:724 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:728 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:732 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:736 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:740 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:744 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:748 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:752 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:756 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:760 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:764 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 4, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 8, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 12, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v4, v7 ; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v5, v9 -; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v3 +; GCN-NEXT: v_mov_b32_e32 v5, v8 +; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v19 +; GCN-NEXT: v_mov_b32_e32 v6, v9 ; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v6, v10 -; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v19 ; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v7, v11 -; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v3 +; GCN-NEXT: v_mov_b32_e32 v7, v10 +; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v19 ; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v12 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v11 ; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v13 -; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v3 +; GCN-NEXT: v_mov_b32_e32 v9, v12 +; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v19 +; GCN-NEXT: v_mov_b32_e32 v10, v13 ; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v14 -; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v19 ; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v15 -; GCN-NEXT: v_add_u32_e32 v0, 0xec, v3 +; GCN-NEXT: v_mov_b32_e32 v11, v14 +; GCN-NEXT: v_add_u32_e32 v0, 0xec, v19 ; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v19 ; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v19 ; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v19 ; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v19 ; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen ; GCN-NEXT: v_and_b32_e32 v0, 63, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_add_u32_e32 v0, v3, v0 +; GCN-NEXT: v_add_u32_e32 v0, v19, v0 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -331,7 +362,7 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 -; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -347,285 +378,316 @@ ; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc -; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc ; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 +; GCN-NEXT: global_load_dwordx4 v[7:10], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[11:14], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[3:4], off offset:48 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v3 ; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 -; GCN-NEXT: v_add_u32_e32 v3, 0x100, v3 -; GCN-NEXT: v_add_u32_e32 v60, 16, v3 -; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v17, vcc -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 -; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v17, vcc -; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v4, vcc +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GCN-NEXT: v_lshrrev_b32_e64 v19, 6, s33 +; GCN-NEXT: v_add_u32_e32 v19, 0x100, v19 +; GCN-NEXT: v_add_u32_e32 v60, 16, v19 +; GCN-NEXT: s_add_u32 s32, s32, 0x14000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x14000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:704 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:708 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:712 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:716 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:720 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:724 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:728 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:732 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:736 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:740 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:744 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:748 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:752 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:756 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:760 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:764 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 -; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 -; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 -; GCN-NEXT: global_load_dwordx4 v[44:47], v[52:53], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off offset:64 +; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:128 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:192 +; GCN-NEXT: v_add_u32_e32 v0, 20, v19 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[52:53], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:16 -; GCN-NEXT: v_add_u32_e32 v0, 20, v3 -; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[40:43], v[56:57], off offset:16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[44:47], v[56:57], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 ; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 24, v3 +; GCN-NEXT: v_add_u32_e32 v0, 24, v19 ; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 28, v3 +; GCN-NEXT: v_add_u32_e32 v0, 28, v19 ; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 32, v3 +; GCN-NEXT: v_add_u32_e32 v0, 32, v19 ; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 36, v3 +; GCN-NEXT: v_add_u32_e32 v0, 36, v19 ; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 40, v3 +; GCN-NEXT: v_add_u32_e32 v0, 40, v19 ; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 44, v3 +; GCN-NEXT: v_add_u32_e32 v0, 44, v19 ; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 48, v3 +; GCN-NEXT: v_add_u32_e32 v0, 48, v19 ; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 52, v3 +; GCN-NEXT: v_add_u32_e32 v0, 52, v19 ; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 56, v3 +; GCN-NEXT: v_add_u32_e32 v0, 56, v19 ; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 60, v3 +; GCN-NEXT: v_add_u32_e32 v0, 60, v19 ; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 64, v3 -; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x44, v3 -; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x48, v3 -; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v3 -; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x50, v3 -; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 64, v19 +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x44, v19 ; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x54, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x48, v19 ; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x58, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v19 ; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x50, v19 ; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x60, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x54, v19 ; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x64, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x58, v19 ; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x68, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v19 ; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x60, v19 ; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0x70, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v12, v32 +; GCN-NEXT: v_add_u32_e32 v0, 0x64, v19 ; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v13, v33 -; GCN-NEXT: v_add_u32_e32 v0, 0x74, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x68, v19 ; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v14, v34 -; GCN-NEXT: v_add_u32_e32 v0, 0x78, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v19 ; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v15, v35 -; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x70, v19 ; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x80, v3 -; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x84, v3 -; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x88, v3 -; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v3 -; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x90, v3 -; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x94, v3 -; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x98, v3 -; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v3 -; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x74, v19 +; GCN-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x78, v19 +; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v19 +; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x80, v19 +; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x84, v19 +; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x88, v19 +; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v19 +; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x90, v19 +; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x94, v19 ; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x98, v19 ; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xac, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v19 ; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v19 +; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v19 +; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v19 +; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xac, v19 +; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v19 ; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v19 ; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v19 ; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v3 +; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v19 ; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v3 -; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v3 -; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v3 -; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v3 -; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 4, v3 -; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 8, v3 -; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 12, v3 -; GCN-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v19 +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:704 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:708 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:712 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:716 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:720 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:724 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:728 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:732 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:736 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:740 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:744 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:748 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:752 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:756 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:760 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:764 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 4, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 8, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 12, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload ; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v2 ; GCN-NEXT: v_and_b32_e32 v0, 63, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_add_u32_e32 v0, v3, v0 +; GCN-NEXT: v_add_u32_e32 v0, v19, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, v8 +; GCN-NEXT: v_mov_b32_e32 v4, v7 ; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v5, v9 -; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v3 +; GCN-NEXT: v_mov_b32_e32 v5, v8 +; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v19 +; GCN-NEXT: v_mov_b32_e32 v6, v9 ; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v6, v10 -; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v19 ; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v7, v11 -; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v3 +; GCN-NEXT: v_mov_b32_e32 v7, v10 +; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v19 ; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v12 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v11 ; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v13 -; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v3 +; GCN-NEXT: v_mov_b32_e32 v9, v12 +; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v19 +; GCN-NEXT: v_mov_b32_e32 v10, v13 ; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v14 -; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v19 ; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v15 -; GCN-NEXT: v_add_u32_e32 v1, 0xec, v3 +; GCN-NEXT: v_mov_b32_e32 v11, v14 +; GCN-NEXT: v_add_u32_e32 v1, 0xec, v19 ; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v19 ; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v19 ; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v19 ; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v3 +; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v19 ; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload @@ -661,356 +723,345 @@ ; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc +; GCN-NEXT: s_movk_i32 s4, 0x80 ; GCN-NEXT: global_load_dwordx4 v[7:10], v[3:4], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[11:14], v[3:4], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[56:59], v[3:4], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[0:1], off -; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[3:4], off offset:48 ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_add_co_u32_e32 v3, vcc, v0, v3 +; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v3 ; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v6, s5 -; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v4, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s4 -; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 -; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc -; GCN-NEXT: v_lshrrev_b32_e64 v62, 6, s33 -; GCN-NEXT: v_add_u32_e32 v62, 0x100, v62 -; GCN-NEXT: v_add_u32_e32 v2, 16, v62 +; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v4, vcc +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GCN-NEXT: v_lshrrev_b32_e64 v19, 6, s33 +; GCN-NEXT: v_add_u32_e32 v19, 0x100, v19 +; GCN-NEXT: v_add_u32_e32 v60, 16, v19 ; GCN-NEXT: s_add_u32 s32, s32, 0x14000 ; GCN-NEXT: s_sub_u32 s32, s32, 0x14000 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:704 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:704 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:708 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:712 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:716 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:720 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:724 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:728 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:732 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:736 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:740 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:744 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:748 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:752 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:756 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:760 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:764 ; 4-byte Folded Spill ; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 -; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 -; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 -; GCN-NEXT: global_load_dwordx4 v[44:47], v[3:4], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[48:51], v[3:4], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[52:55], v[3:4], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:16 -; GCN-NEXT: v_add_u32_e32 v0, 20, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:708 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:712 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:716 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:720 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:724 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:728 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:732 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:736 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:740 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:744 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:748 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:752 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:756 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:760 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:764 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:768 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:48 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, v2, s[0:3], 0 offen +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off offset:64 +; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:128 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:192 +; GCN-NEXT: v_add_u32_e32 v0, 20, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[36:39], v[52:53], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[40:43], v[56:57], off offset:16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[44:47], v[56:57], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 ; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 24, v62 +; GCN-NEXT: v_add_u32_e32 v0, 24, v19 ; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 28, v62 +; GCN-NEXT: v_add_u32_e32 v0, 28, v19 ; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 32, v62 +; GCN-NEXT: v_add_u32_e32 v0, 32, v19 ; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 36, v62 +; GCN-NEXT: v_add_u32_e32 v0, 36, v19 ; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 40, v62 +; GCN-NEXT: v_add_u32_e32 v0, 40, v19 ; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 44, v62 +; GCN-NEXT: v_add_u32_e32 v0, 44, v19 ; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 48, v62 +; GCN-NEXT: v_add_u32_e32 v0, 48, v19 ; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 52, v62 +; GCN-NEXT: v_add_u32_e32 v0, 52, v19 ; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 56, v62 +; GCN-NEXT: v_add_u32_e32 v0, 56, v19 ; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 60, v62 +; GCN-NEXT: v_add_u32_e32 v0, 60, v19 ; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 64, v62 -; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x44, v62 -; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x48, v62 -; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v62 -; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x50, v62 +; GCN-NEXT: v_add_u32_e32 v0, 64, v19 +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x44, v19 +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x48, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x50, v19 ; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x54, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x54, v19 ; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x58, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x58, v19 ; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v19 ; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x60, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x60, v19 ; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x64, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x64, v19 ; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x68, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x68, v19 ; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v19 ; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x70, v62 -; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x74, v62 -; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x78, v62 -; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v62 -; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x80, v62 -; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x84, v62 -; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x88, v62 -; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v62 -; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x90, v62 -; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x94, v62 -; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x98, v62 -; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v62 -; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x70, v19 +; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x74, v19 +; GCN-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x78, v19 +; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v19 +; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x80, v19 +; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x84, v19 +; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x88, v19 +; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v19 +; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0x90, v19 +; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen ; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x94, v19 ; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x98, v19 ; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xac, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v19 ; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v19 +; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v19 +; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v19 +; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xac, v19 +; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v19 ; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v19 ; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v19 ; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v19 ; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v62 -; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v62 -; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v62 -; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v62 -; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:704 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 4, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 8, v62 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 12, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v19 ; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:708 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:712 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:716 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:720 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:724 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:728 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:732 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:736 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:740 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:744 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:748 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:752 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:756 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:760 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:764 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:768 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, v5 -; GCN-NEXT: v_mov_b32_e32 v5, v6 +; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:704 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:708 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:712 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:716 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:720 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:724 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:728 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:732 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:736 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:740 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:744 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:748 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:752 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:756 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:760 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:764 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 4, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 8, v19 +; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 12, v19 +; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v4, v7 ; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v62 -; GCN-NEXT: v_mov_b32_e32 v6, v7 +; GCN-NEXT: v_mov_b32_e32 v5, v8 +; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v19 +; GCN-NEXT: v_mov_b32_e32 v6, v9 ; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v19 ; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v7, v8 -; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v62 +; GCN-NEXT: v_mov_b32_e32 v7, v10 +; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v19 ; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v9 -; GCN-NEXT: v_mov_b32_e32 v9, v10 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v19 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v11 ; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v62 -; GCN-NEXT: v_mov_b32_e32 v10, v11 +; GCN-NEXT: v_mov_b32_e32 v9, v12 +; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v19 +; GCN-NEXT: v_mov_b32_e32 v10, v13 ; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v62 +; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v19 ; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v12 -; GCN-NEXT: v_add_u32_e32 v0, 0xec, v62 +; GCN-NEXT: v_mov_b32_e32 v11, v14 +; GCN-NEXT: v_add_u32_e32 v0, 0xec, v19 ; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v12, v13 -; GCN-NEXT: v_mov_b32_e32 v13, v14 -; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v62 -; GCN-NEXT: v_mov_b32_e32 v14, v15 -; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v62 -; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v15, v16 -; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v62 -; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v0, 31, v0 +; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v19 +; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v19 +; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v19 +; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v19 +; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v0, 31, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GCN-NEXT: v_add_u32_e32 v0, v62, v0 +; GCN-NEXT: v_add_u32_e32 v0, v19, v0 ; GCN-NEXT: v_add_u32_e32 v1, 4, v0 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s33, s6 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -2106,11 +2106,11 @@ ; GFX6-NEXT: s_brev_b32 s4, 1 ; GFX6-NEXT: v_min_i32_e32 v32, 0, v0 ; GFX6-NEXT: v_sub_i32_e32 v32, vcc, s4, v32 -; GFX6-NEXT: v_max_i32_e32 v16, v32, v16 +; GFX6-NEXT: v_max_i32_e32 v32, v32, v16 ; GFX6-NEXT: s_brev_b32 s5, -2 -; GFX6-NEXT: v_max_i32_e32 v32, 0, v0 -; GFX6-NEXT: v_sub_i32_e32 v32, vcc, s5, v32 -; GFX6-NEXT: v_min_i32_e32 v16, v16, v32 +; GFX6-NEXT: v_max_i32_e32 v16, 0, v0 +; GFX6-NEXT: v_sub_i32_e32 v16, vcc, s5, v16 +; GFX6-NEXT: v_min_i32_e32 v16, v32, v16 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v16 ; GFX6-NEXT: v_min_i32_e32 v16, 0, v1 ; GFX6-NEXT: v_sub_i32_e32 v16, vcc, s4, v16 @@ -2227,11 +2227,11 @@ ; GFX8-NEXT: s_brev_b32 s4, 1 ; GFX8-NEXT: v_min_i32_e32 v32, 0, v0 ; GFX8-NEXT: v_sub_u32_e32 v32, vcc, s4, v32 -; GFX8-NEXT: v_max_i32_e32 v16, v32, v16 +; GFX8-NEXT: v_max_i32_e32 v32, v32, v16 ; GFX8-NEXT: s_brev_b32 s5, -2 -; GFX8-NEXT: v_max_i32_e32 v32, 0, v0 -; GFX8-NEXT: v_sub_u32_e32 v32, vcc, s5, v32 -; GFX8-NEXT: v_min_i32_e32 v16, v16, v32 +; GFX8-NEXT: v_max_i32_e32 v16, 0, v0 +; GFX8-NEXT: v_sub_u32_e32 v16, vcc, s5, v16 +; GFX8-NEXT: v_min_i32_e32 v16, v32, v16 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v16 ; GFX8-NEXT: v_min_i32_e32 v16, 0, v1 ; GFX8-NEXT: v_sub_u32_e32 v16, vcc, s4, v16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -2092,11 +2092,11 @@ ; GFX6-NEXT: s_brev_b32 s4, -2 ; GFX6-NEXT: v_max_i32_e32 v32, -1, v0 ; GFX6-NEXT: v_subrev_i32_e32 v32, vcc, s4, v32 -; GFX6-NEXT: v_max_i32_e32 v16, v32, v16 +; GFX6-NEXT: v_max_i32_e32 v32, v32, v16 ; GFX6-NEXT: s_brev_b32 s5, 1 -; GFX6-NEXT: v_min_i32_e32 v32, -1, v0 -; GFX6-NEXT: v_subrev_i32_e32 v32, vcc, s5, v32 -; GFX6-NEXT: v_min_i32_e32 v16, v16, v32 +; GFX6-NEXT: v_min_i32_e32 v16, -1, v0 +; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, s5, v16 +; GFX6-NEXT: v_min_i32_e32 v16, v32, v16 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 ; GFX6-NEXT: v_max_i32_e32 v16, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, s4, v16 @@ -2213,11 +2213,11 @@ ; GFX8-NEXT: s_brev_b32 s4, -2 ; GFX8-NEXT: v_max_i32_e32 v32, -1, v0 ; GFX8-NEXT: v_subrev_u32_e32 v32, vcc, s4, v32 -; GFX8-NEXT: v_max_i32_e32 v16, v32, v16 +; GFX8-NEXT: v_max_i32_e32 v32, v32, v16 ; GFX8-NEXT: s_brev_b32 s5, 1 -; GFX8-NEXT: v_min_i32_e32 v32, -1, v0 -; GFX8-NEXT: v_subrev_u32_e32 v32, vcc, s5, v32 -; GFX8-NEXT: v_min_i32_e32 v16, v16, v32 +; GFX8-NEXT: v_min_i32_e32 v16, -1, v0 +; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, s5, v16 +; GFX8-NEXT: v_min_i32_e32 v16, v32, v16 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v16 ; GFX8-NEXT: v_max_i32_e32 v16, -1, v1 ; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, s4, v16 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -58,20 +58,21 @@ ; CHECK: SI_SPILL_V128_SAVE %117, %stack.10, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5) ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1) ; CHECK: undef %122.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK: undef %126.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK: undef %130.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %130, %stack.11, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) - ; CHECK: undef %135.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %135, %stack.12, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %122, %stack.11, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) + ; CHECK: undef %127.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %127, %stack.12, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) + ; CHECK: undef %132.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %132, %stack.13, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) + ; CHECK: undef %137.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %137, %stack.14, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) - ; CHECK: undef %140.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %144, %stack.13, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) - ; CHECK: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %149, %stack.14, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) - ; CHECK: undef %154.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK: undef %142.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK: undef %146.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK: undef %150.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE %150, %stack.15, $sgpr32, 0, implicit $exec :: (store 16 into %stack.15, align 4, addrspace 5) + ; CHECK: undef %155.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) - ; CHECK: undef %158.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK: undef %159.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK: undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec @@ -124,28 +125,29 @@ ; CHECK: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE10]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE10]], %stack.10, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5) - ; CHECK: undef %123.sub2:vreg_128 = COPY %122.sub2 - ; CHECK: %123.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK: undef %127.sub2:vreg_128 = COPY %126.sub2 - ; CHECK: %127.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec ; CHECK: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE11]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE11]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE11]], %stack.11, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE12:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE12]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE12]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE12]], %stack.12, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) - ; CHECK: undef %141.sub2:vreg_128 = COPY %140.sub2 - ; CHECK: %141.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec ; CHECK: [[SI_SPILL_V128_RESTORE13:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE13]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE13]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE13]], %stack.13, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE14:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE14]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE14]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE14]], %stack.14, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) - ; CHECK: undef %155.sub2:vreg_128 = COPY %154.sub2 - ; CHECK: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK: undef %159.sub2:vreg_128 = COPY %158.sub2 - ; CHECK: %159.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK: undef %143.sub2:vreg_128 = COPY %142.sub2 + ; CHECK: %143.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK: undef %147.sub2:vreg_128 = COPY %146.sub2 + ; CHECK: %147.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.15, $sgpr32, 0, implicit $exec :: (load 16 from %stack.15, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE15]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE15]], %stack.15, $sgpr32, 0, implicit $exec :: (store 16 into %stack.15, align 4, addrspace 5) + ; CHECK: undef %156.sub2:vreg_128 = COPY %155.sub2 + ; CHECK: %156.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK: undef %160.sub2:vreg_128 = COPY %159.sub2 + ; CHECK: %160.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK: %38.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec @@ -174,67 +176,68 @@ ; CHECK: %36.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %36.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) - ; CHECK: undef %157.sub0:vreg_128 = COPY %159.sub0 { - ; CHECK: internal %157.sub2:vreg_128 = COPY %159.sub2 + ; CHECK: undef %158.sub0:vreg_128 = COPY %160.sub0 { + ; CHECK: internal %158.sub2:vreg_128 = COPY %160.sub2 ; CHECK: } - ; CHECK: %157.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %157.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: undef %153.sub0:vreg_128 = COPY %155.sub0 { - ; CHECK: internal %153.sub2:vreg_128 = COPY %155.sub2 + ; CHECK: %158.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %158.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %158, %2, 0, 400, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: undef %154.sub0:vreg_128 = COPY %156.sub0 { + ; CHECK: internal %154.sub2:vreg_128 = COPY %156.sub2 ; CHECK: } - ; CHECK: %153.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %153.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) - ; CHECK: undef %148.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub0 { - ; CHECK: internal %148.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub2 + ; CHECK: %154.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %154.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %154, %2, 0, 352, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.15, $sgpr32, 0, implicit $exec :: (load 16 from %stack.15, align 4, addrspace 5) + ; CHECK: undef %149.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 { + ; CHECK: internal %149.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2 ; CHECK: } - ; CHECK: %148.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %148.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) - ; CHECK: undef %143.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 { - ; CHECK: internal %143.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2 + ; CHECK: %149.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %149.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %149, %2, 0, 368, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: undef %145.sub0:vreg_128 = COPY %147.sub0 { + ; CHECK: internal %145.sub2:vreg_128 = COPY %147.sub2 ; CHECK: } - ; CHECK: %143.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %143.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) - ; CHECK: undef %139.sub0:vreg_128 = COPY %141.sub0 { - ; CHECK: internal %139.sub2:vreg_128 = COPY %141.sub2 + ; CHECK: %145.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %145.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %145, %2, 0, 320, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: undef %141.sub0:vreg_128 = COPY %143.sub0 { + ; CHECK: internal %141.sub2:vreg_128 = COPY %143.sub2 ; CHECK: } - ; CHECK: %139.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %139.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) - ; CHECK: undef %134.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 { - ; CHECK: internal %134.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2 + ; CHECK: %141.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %141.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %141, %2, 0, 336, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) + ; CHECK: undef %136.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 { + ; CHECK: internal %136.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2 ; CHECK: } - ; CHECK: %134.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %134.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) - ; CHECK: undef %129.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 { - ; CHECK: internal %129.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2 + ; CHECK: %136.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %136.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %136, %2, 0, 288, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) + ; CHECK: undef %131.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 { + ; CHECK: internal %131.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2 ; CHECK: } - ; CHECK: %129.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %129.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: undef %125.sub0:vreg_128 = COPY %127.sub0 { - ; CHECK: internal %125.sub2:vreg_128 = COPY %127.sub2 + ; CHECK: %131.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %131.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %131, %2, 0, 304, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) + ; CHECK: undef %126.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 { + ; CHECK: internal %126.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2 ; CHECK: } - ; CHECK: %125.sub1:vreg_128 = COPY %43.sub1 - ; CHECK: %125.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1) - ; CHECK: undef %121.sub0:vreg_128 = COPY %123.sub0 { - ; CHECK: internal %121.sub2:vreg_128 = COPY %123.sub2 + ; CHECK: %126.sub1:vreg_128 = COPY %43.sub1 + ; CHECK: %126.sub3:vreg_128 = COPY %43.sub1 + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %126, %2, 0, 256, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) + ; CHECK: undef %121.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 { + ; CHECK: internal %121.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2 ; CHECK: } ; CHECK: %121.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %121.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) - ; CHECK: undef %116.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 { - ; CHECK: internal %116.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) + ; CHECK: undef %116.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 { + ; CHECK: internal %116.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2 ; CHECK: } ; CHECK: %116.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %116.sub3:vreg_128 = COPY %43.sub1 @@ -257,23 +260,23 @@ ; CHECK: %104.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %104.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5) - ; CHECK: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 { - ; CHECK: internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5) + ; CHECK: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 { + ; CHECK: internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2 ; CHECK: } ; CHECK: %99.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %99.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5) - ; CHECK: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 { - ; CHECK: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5) + ; CHECK: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 { + ; CHECK: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2 ; CHECK: } ; CHECK: %94.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %94.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5) - ; CHECK: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 { - ; CHECK: internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5) + ; CHECK: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 { + ; CHECK: internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2 ; CHECK: } ; CHECK: %89.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %89.sub3:vreg_128 = COPY %43.sub1 @@ -284,23 +287,23 @@ ; CHECK: %85.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %85.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5) - ; CHECK: undef %80.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 { - ; CHECK: internal %80.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5) + ; CHECK: undef %80.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 { + ; CHECK: internal %80.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2 ; CHECK: } ; CHECK: %80.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %80.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5) - ; CHECK: undef %75.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 { - ; CHECK: internal %75.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5) + ; CHECK: undef %75.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 { + ; CHECK: internal %75.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2 ; CHECK: } ; CHECK: %75.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %75.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5) - ; CHECK: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 { - ; CHECK: internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5) + ; CHECK: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 { + ; CHECK: internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2 ; CHECK: } ; CHECK: %70.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %70.sub3:vreg_128 = COPY %43.sub1 @@ -311,30 +314,30 @@ ; CHECK: %66.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %66.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5) - ; CHECK: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 { - ; CHECK: internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5) + ; CHECK: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 { + ; CHECK: internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2 ; CHECK: } ; CHECK: %61.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %61.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5) - ; CHECK: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 { - ; CHECK: internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5) + ; CHECK: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 { + ; CHECK: internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2 ; CHECK: } ; CHECK: %56.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %56.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5) - ; CHECK: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 { - ; CHECK: internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE30:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5) + ; CHECK: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE30]].sub0 { + ; CHECK: internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE30]].sub2 ; CHECK: } ; CHECK: %51.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %51.sub3:vreg_128 = COPY %43.sub1 ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) - ; CHECK: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 { - ; CHECK: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2 + ; CHECK: [[SI_SPILL_V128_RESTORE31:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; CHECK: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE31]].sub0 { + ; CHECK: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE31]].sub2 ; CHECK: } ; CHECK: %46.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %46.sub3:vreg_128 = COPY %43.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -133,17 +133,17 @@ ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_mov_b32_e32 v40, v16 +; GFX10-NEXT: v_mov_b32_e32 v44, v16 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v41, v15 ; GFX10-NEXT: v_mov_b32_e32 v42, v14 ; GFX10-NEXT: v_mov_b32_e32 v43, v13 -; GFX10-NEXT: v_mov_b32_e32 v44, v12 +; GFX10-NEXT: v_mov_b32_e32 v40, v12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v44, v43, v42, v41, v40], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v40, v43, v42, v41, v44], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10: buffer_load_dword v44, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4