Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -273,6 +273,9 @@ LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorExtractVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3293,6 +3293,56 @@ return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowVecTy) { + assert(TypeIdx == 1 && "not a vector type index"); + + // TODO: Handle total scalarization case. + if (!NarrowVecTy.isVector()) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + LLT VecTy = MRI.getType(SrcVec); + + // If the index is a constant, we can really break this down as you would + // expect, and index into the target size pieces. + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + // Avoid out of bounds indexing the pieces. + if (IdxVal >= VecTy.getNumElements()) { + MIRBuilder.buildUndef(DstReg); + MI.eraseFromParent(); + return Legalized; + } + + SmallVector VecParts; + LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); + + // Build a sequence of NarrowTy pieces in VecParts for this operand. + buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); + + unsigned NewNumElts = NarrowVecTy.getNumElements(); + + LLT IdxTy = MRI.getType(Idx); + int64_t PartIdx = IdxVal / NewNumElts; + auto NewIdx = + MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); + + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + MI.eraseFromParent(); + return Legalized; + } + + // With a variable index, we can't perform the extract in a smaller type, so + // we're forced to expand this. + return lowerExtractVectorElt(MI); +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -3620,6 +3670,8 @@ return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); + case G_EXTRACT_VECTOR_ELT: + return fewerElementsVectorExtractVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1284,7 +1284,8 @@ .clampScalar(EltTypeIdx, S32, S64) .clampScalar(VecTypeIdx, S32, S64) .clampScalar(IdxTypeIdx, S32, S32) - // TODO: Clamp the number of elements before resorting to stack lowering. + .clampMaxNumElements(1, S32, 32) + // TODO: Clamp elements for 64-bit vectors? // It should only be necessary with variable indexes. // As a last resort, lower to the stack .lower(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -0,0 +1,1318 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; Check lowering of some large extractelement that use the stack +; instead of register indexing. + +define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { +; GCN-LABEL: v_extract_v64i32_varidx: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v17, v0 +; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: v_mov_b32_e32 v16, s5 +; GCN-NEXT: v_mov_b32_e32 v18, v1 +; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v17 +; GCN-NEXT: v_mov_b32_e32 v15, s4 +; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v18, vcc +; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v17, v15 +; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v18, v16, vcc +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v16, s5 +; GCN-NEXT: v_mov_b32_e32 v15, s4 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v17, v15 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[17:18], off +; GCN-NEXT: global_load_dwordx4 v[7:10], v[17:18], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[11:14], v[17:18], off offset:32 +; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v18, v16, vcc +; GCN-NEXT: global_load_dwordx4 v[15:18], v[17:18], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off +; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off +; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 +; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 +; GCN-NEXT: v_add_u32_e32 v1, 16, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 24, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 32, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 40, v0 +; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 48, v0 +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 56, v0 +; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 64, v0 +; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 +; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 +; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 +; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 +; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 +; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 +; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 +; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 +; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 +; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 +; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 +; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v15 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v17 +; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v18 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 +; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 +; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v0 +; GCN-NEXT: buffer_store_dword v52, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v0 +; GCN-NEXT: buffer_store_dword v53, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v54, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 +; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v0 +; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v0 +; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xec, v0 +; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v0 +; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 +; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v0 +; GCN-NEXT: buffer_store_dword v61, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v0 +; GCN-NEXT: buffer_store_dword v62, v1, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v1, 63, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 %idx + ret i32 %elt +} + +define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { +; GCN-LABEL: v_extract_v128i16_varidx: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_add_u32 s4, s32, 0x7fc0 +; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xffff8000 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:1024 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[11:14], v[0:1], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[15:18], v[0:1], off offset:48 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_add_u32 s32, s32, 0x20000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x20000 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:1032 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_lshrrev_b32_e32 v36, 16, v12 +; GCN-NEXT: v_bfe_i32 v48, v12, 0, 16 +; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 +; GCN-NEXT: v_lshrrev_b32_e32 v37, 16, v13 +; GCN-NEXT: v_bfe_i32 v49, v13, 0, 16 +; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc +; GCN-NEXT: v_bfe_i32 v2, v3, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v20, 16, v4 +; GCN-NEXT: v_lshrrev_b32_e32 v21, 16, v5 +; GCN-NEXT: v_lshrrev_b32_e32 v22, 16, v6 +; GCN-NEXT: v_bfe_i32 v24, v4, 0, 16 +; GCN-NEXT: v_bfe_i32 v25, v5, 0, 16 +; GCN-NEXT: v_bfe_i32 v26, v6, 0, 16 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[12:13], off +; GCN-NEXT: v_lshrrev_b32_e32 v28, 16, v8 +; GCN-NEXT: v_lshrrev_b32_e32 v29, 16, v9 +; GCN-NEXT: v_lshrrev_b32_e32 v30, 16, v10 +; GCN-NEXT: v_bfe_i32 v32, v8, 0, 16 +; GCN-NEXT: v_bfe_i32 v33, v9, 0, 16 +; GCN-NEXT: v_bfe_i32 v34, v10, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v35, 16, v11 +; GCN-NEXT: v_bfe_i32 v39, v11, 0, 16 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 +; GCN-NEXT: v_lshrrev_b32_e32 v27, 16, v7 +; GCN-NEXT: v_bfe_i32 v31, v7, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v38, 16, v14 +; GCN-NEXT: v_bfe_i32 v50, v14, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_lshrrev_b32_e32 v51, 16, v15 +; GCN-NEXT: v_bfe_i32 v55, v15, 0, 16 +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:1028 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v52, 16, v16 +; GCN-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v53, 16, v17 +; GCN-NEXT: v_bfe_i32 v17, v17, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v54, 16, v18 +; GCN-NEXT: v_bfe_i32 v18, v18, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_lshrrev_b32_e32 v41, 16, v4 +; GCN-NEXT: v_lshrrev_b32_e32 v42, 16, v5 +; GCN-NEXT: v_lshrrev_b32_e32 v43, 16, v6 +; GCN-NEXT: v_bfe_i32 v45, v4, 0, 16 +; GCN-NEXT: v_bfe_i32 v46, v5, 0, 16 +; GCN-NEXT: v_bfe_i32 v47, v6, 0, 16 +; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 +; GCN-NEXT: v_lshrrev_b32_e32 v40, 16, v3 +; GCN-NEXT: v_bfe_i32 v44, v3, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v9 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1284 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v10 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1272 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v11 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1260 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v10, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1288 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v11, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1280 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v23, 16, v8 +; GCN-NEXT: v_bfe_i32 v19, v8, 0, 16 +; GCN-NEXT: v_bfe_i32 v59, v9, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(6) +; GCN-NEXT: v_lshrrev_b32_e32 v56, 16, v4 +; GCN-NEXT: s_waitcnt vmcnt(5) +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v12 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1268 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v13 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1252 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v14 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1236 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v15 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1220 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v12, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1276 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v13, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1264 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v14, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1256 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v15, 0, 16 +; GCN-NEXT: v_bfe_i32 v60, v4, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1244 ; 4-byte Folded Spill +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v12, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, v1, v4, vcc +; GCN-NEXT: v_lshrrev_b32_e32 v57, 16, v5 +; GCN-NEXT: v_lshrrev_b32_e32 v58, 16, v6 +; GCN-NEXT: v_bfe_i32 v61, v5, 0, 16 +; GCN-NEXT: v_bfe_i32 v62, v6, 0, 16 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[12:13], off +; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v7 +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:1292 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v2, v7, 0, 16 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1240 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GCN-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1224 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1248 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v4, 0, 16 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1208 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1232 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v5, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1228 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v6, 0, 16 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1196 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1216 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1204 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v5 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1192 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v6 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1184 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v7 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1172 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v4, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1212 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v5, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1200 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v6, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1188 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v7, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1180 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v8 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1176 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v9 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1168 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v10 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1152 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v11 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1140 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v8, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1156 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v9, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1160 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v10, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1164 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v11, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1136 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(17) +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v12 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1144 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v13 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1132 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v14 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1128 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v15 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1100 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v12, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1148 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v13, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1116 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v14, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1120 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v3, v15, 0, 16 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:1124 ; 4-byte Folded Spill +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GCN-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1112 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GCN-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1104 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:1108 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v4, v5, 0, 16 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1096 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:1088 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v4, v6, 0, 16 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:1080 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:1092 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GCN-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:1084 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v5 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:1076 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v6 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:1048 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v7 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:1044 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GCN-NEXT: v_bfe_i32 v5, v5, 0, 16 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1068 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v5, v6, 0, 16 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1072 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v5, v7, 0, 16 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1040 ; 4-byte Folded Spill +; GCN-NEXT: v_bfe_i32 v0, v15, 0, 16 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1036 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GCN-NEXT: v_add_u32_e32 v0, 0x200, v0 +; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v15 +; GCN-NEXT: v_add_u32_e32 v15, 8, v0 +; GCN-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v13 +; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v12 +; GCN-NEXT: v_bfe_i32 v12, v12, 0, 16 +; GCN-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GCN-NEXT: v_bfe_i32 v13, v13, 0, 16 +; GCN-NEXT: v_bfe_i32 v7, v7, 0, 16 +; GCN-NEXT: v_bfe_i32 v6, v6, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(4) +; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v8 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1052 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v9 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1056 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v10 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1060 ; 4-byte Folded Spill +; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v11 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:1064 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v24, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v20, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 12, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 16, v0 +; GCN-NEXT: buffer_store_dword v25, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v21, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 20, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 24, v0 +; GCN-NEXT: buffer_store_dword v26, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v22, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 28, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 32, v0 +; GCN-NEXT: buffer_store_dword v31, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v27, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 36, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 40, v0 +; GCN-NEXT: buffer_store_dword v32, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v28, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 44, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 48, v0 +; GCN-NEXT: buffer_store_dword v33, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v29, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 52, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 56, v0 +; GCN-NEXT: buffer_store_dword v34, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v30, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 60, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 64, v0 +; GCN-NEXT: buffer_store_dword v39, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v35, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 0x44, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x48, v0 +; GCN-NEXT: buffer_store_dword v48, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v36, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x50, v0 +; GCN-NEXT: buffer_store_dword v49, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v37, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 0x54, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x58, v0 +; GCN-NEXT: buffer_store_dword v50, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v38, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x60, v0 +; GCN-NEXT: buffer_store_dword v55, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v51, 0, 16 +; GCN-NEXT: v_add_u32_e32 v20, 0x64, v0 +; GCN-NEXT: buffer_store_dword v15, v20, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x68, v0 +; GCN-NEXT: buffer_store_dword v16, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v52, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x70, v0 +; GCN-NEXT: buffer_store_dword v17, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v53, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x74, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x78, v0 +; GCN-NEXT: buffer_store_dword v18, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v54, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x80, v0 +; GCN-NEXT: buffer_store_dword v44, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v40, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x84, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x88, v0 +; GCN-NEXT: buffer_store_dword v45, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v41, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x90, v0 +; GCN-NEXT: buffer_store_dword v46, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v42, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x94, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0x98, v0 +; GCN-NEXT: buffer_store_dword v47, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v43, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0xa0, v0 +; GCN-NEXT: buffer_store_dword v60, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v56, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v61, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v57, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0xac, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0xb0, v0 +; GCN-NEXT: buffer_store_dword v62, v15, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v15, v58, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v15, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1292 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xbc, v0 +; GCN-NEXT: v_add_u32_e32 v17, 0x12c, v0 +; GCN-NEXT: v_add_u32_e32 v18, 0x134, v0 +; GCN-NEXT: v_add_u32_e32 v20, 0x13c, v0 +; GCN-NEXT: v_add_u32_e32 v21, 0x14c, v0 +; GCN-NEXT: v_add_u32_e32 v22, 0x150, v0 +; GCN-NEXT: v_bfe_i32 v8, v8, 0, 16 +; GCN-NEXT: v_bfe_i32 v9, v9, 0, 16 +; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v14 +; GCN-NEXT: v_bfe_i32 v10, v10, 0, 16 +; GCN-NEXT: v_bfe_i32 v11, v11, 0, 16 +; GCN-NEXT: v_bfe_i32 v14, v14, 0, 16 +; GCN-NEXT: v_bfe_i32 v5, v5, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v2, 0xc0, v0 +; GCN-NEXT: buffer_store_dword v19, v2, s[0:3], 0 offen +; GCN-NEXT: v_bfe_i32 v2, v23, 0, 16 +; GCN-NEXT: v_add_u32_e32 v15, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v2, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v59, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1284 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xcc, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1288 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0xd0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1272 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xd4, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1280 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0xd8, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1260 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xdc, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1276 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0xe0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1268 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xe4, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1264 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0xe8, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1252 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xec, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1256 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0xf0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1236 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xf4, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1244 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0xf8, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1220 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0xfc, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1248 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x100, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1240 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0x104, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1232 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x108, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1224 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0x10c, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1228 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x110, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1208 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0x114, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1216 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x118, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1196 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0x11c, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1212 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x120, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1204 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0x124, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1200 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v15, 0x128, v0 +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1032 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: buffer_store_dword v16, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1192 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: v_add_u32_e32 v15, 4, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GCN-NEXT: buffer_store_dword v16, v17, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:1184 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:1188 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v16, 0x130, v0 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_bfe_i32 v17, v17, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v19, v16, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:1172 ; 4-byte Folded Reload +; GCN-NEXT: buffer_store_dword v17, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:1180 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v16, 0x138, v0 +; GCN-NEXT: v_add_u32_e32 v18, 0x144, v0 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v19, v19, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v17, v16, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v19, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:1176 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:1156 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v16, 0x140, v0 +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:1168 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v19, 0x148, v0 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v17, v17, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: buffer_store_dword v23, v16, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v17, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1160 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_bfe_i32 v20, v20, 0, 16 +; GCN-NEXT: v_add_u32_e32 v17, 0x154, v0 +; GCN-NEXT: v_add_u32_e32 v18, 0x158, v0 +; GCN-NEXT: v_add_u32_e32 v23, 0x164, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v16, v19, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v20, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1164 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v20, 0x15c, v0 +; GCN-NEXT: v_add_u32_e32 v21, 0x160, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v16, v22, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1152 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:1140 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:1144 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GCN-NEXT: buffer_store_dword v16, v17, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1136 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_bfe_i32 v19, v19, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v22, v22, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v16, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v19, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1148 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v18, 0x16c, v0 +; GCN-NEXT: v_add_u32_e32 v19, 0x170, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v16, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v22, v23, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:1132 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:1116 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v16, 0x168, v0 +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:1128 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v21, 0x174, v0 +; GCN-NEXT: v_add_u32_e32 v22, 0x178, v0 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v17, v17, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: buffer_store_dword v23, v16, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v17, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1120 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_bfe_i32 v20, v20, 0, 16 +; GCN-NEXT: v_add_u32_e32 v17, 0x17c, v0 +; GCN-NEXT: v_add_u32_e32 v18, 0x180, v0 +; GCN-NEXT: v_add_u32_e32 v23, 0x18c, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v16, v19, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v20, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1124 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v20, 0x184, v0 +; GCN-NEXT: v_add_u32_e32 v21, 0x188, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v16, v22, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1100 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:1112 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:1104 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_bfe_i32 v19, v19, 0, 16 +; GCN-NEXT: buffer_store_dword v16, v17, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v19, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1108 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(4) +; GCN-NEXT: v_bfe_i32 v22, v22, 0, 16 +; GCN-NEXT: v_add_u32_e32 v17, 0x194, v0 +; GCN-NEXT: v_add_u32_e32 v18, 0x198, v0 +; GCN-NEXT: v_add_u32_e32 v20, 0x19c, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v22, v23, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1096 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:1088 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v3, 0x190, v0 +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:1080 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v21, 0x1a0, v0 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: buffer_store_dword v22, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v16, v17, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1092 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_bfe_i32 v19, v19, 0, 16 +; GCN-NEXT: v_add_u32_e32 v16, 0x1a8, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v19, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v4, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1084 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v4, 0x1a4, v0 +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:1076 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:1048 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v18, 0x1ac, v0 +; GCN-NEXT: v_add_u32_e32 v19, 0x1b0, v0 +; GCN-NEXT: v_add_u32_e32 v21, 0x1b4, v0 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GCN-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1068 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_bfe_i32 v17, v17, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v20, v20, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, v16, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v17, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1072 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v18, 0x1bc, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, v19, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v20, v21, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v2, v15, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1028 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1052 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:1056 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:1040 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:1060 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:1064 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v3, 0x1b8, v0 +; GCN-NEXT: v_add_u32_e32 v19, 0x1c0, v0 +; GCN-NEXT: v_add_u32_e32 v20, 0x1c4, v0 +; GCN-NEXT: v_add_u32_e32 v21, 0x1c8, v0 +; GCN-NEXT: s_waitcnt vmcnt(5) +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1044 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(6) +; GCN-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(5) +; GCN-NEXT: v_bfe_i32 v15, v15, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_bfe_i32 v17, v17, 0, 16 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: buffer_store_dword v22, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v2, v18, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v8, v19, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v4, v20, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v9, v21, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v2, 0x1cc, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0x1d0, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0x1d4, v0 +; GCN-NEXT: v_add_u32_e32 v8, 0x1d8, v0 +; GCN-NEXT: v_add_u32_e32 v9, 0x1dc, v0 +; GCN-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v10, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v16, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v11, v8, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v17, v9, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v2, 0x1e0, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0x1e4, v0 +; GCN-NEXT: v_add_u32_e32 v4, 0x1e8, v0 +; GCN-NEXT: v_add_u32_e32 v8, 0x1ec, v0 +; GCN-NEXT: v_add_u32_e32 v9, 0x1f0, v0 +; GCN-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v7, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v13, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v1, v8, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v14, v9, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x1f4, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:1036 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x1f8, v0 +; GCN-NEXT: v_add_u32_e32 v3, 0x1fc, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:1024 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_and_b32_e32 v1, 0x7f, v1 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %vec = load <128 x i16>, <128 x i16> addrspace(1)* %ptr + %elt = extractelement <128 x i16> %vec, i32 %idx + ret i16 %elt +} + +define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { +; GCN-LABEL: v_extract_v32i64_varidx: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v17, v0 +; GCN-NEXT: s_add_u32 s4, s32, 0x3fc0 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xffffc000 +; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: v_mov_b32_e32 v16, s5 +; GCN-NEXT: v_mov_b32_e32 v18, v1 +; GCN-NEXT: v_add_co_u32_e32 v31, vcc, 64, v17 +; GCN-NEXT: v_mov_b32_e32 v15, s4 +; GCN-NEXT: v_addc_co_u32_e32 v32, vcc, 0, v18, vcc +; GCN-NEXT: v_add_co_u32_e32 v48, vcc, v17, v15 +; GCN-NEXT: v_addc_co_u32_e32 v49, vcc, v18, v16, vcc +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v16, s5 +; GCN-NEXT: v_mov_b32_e32 v15, s4 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_add_co_u32_e32 v59, vcc, v17, v15 +; GCN-NEXT: global_load_dwordx4 v[3:6], v[17:18], off +; GCN-NEXT: global_load_dwordx4 v[7:10], v[17:18], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[11:14], v[17:18], off offset:32 +; GCN-NEXT: v_addc_co_u32_e32 v60, vcc, v18, v16, vcc +; GCN-NEXT: global_load_dwordx4 v[15:18], v[17:18], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[19:22], v[31:32], off +; GCN-NEXT: global_load_dwordx4 v[23:26], v[31:32], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[27:30], v[31:32], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[31:34], v[31:32], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[35:38], v[48:49], off +; GCN-NEXT: global_load_dwordx4 v[39:42], v[48:49], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[48:49], off offset:32 +; GCN-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GCN-NEXT: v_add_u32_e32 v0, 0x100, v0 +; GCN-NEXT: v_add_u32_e32 v1, 16, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[47:50], v[48:49], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[51:54], v[59:60], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[55:58], v[59:60], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[59:62], v[59:60], off offset:48 +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 20, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 24, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 28, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 32, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 36, v0 +; GCN-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 40, v0 +; GCN-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 44, v0 +; GCN-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 48, v0 +; GCN-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 52, v0 +; GCN-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 56, v0 +; GCN-NEXT: buffer_store_dword v17, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 60, v0 +; GCN-NEXT: buffer_store_dword v18, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 64, v0 +; GCN-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x44, v0 +; GCN-NEXT: buffer_store_dword v20, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x48, v0 +; GCN-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x4c, v0 +; GCN-NEXT: buffer_store_dword v22, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x50, v0 +; GCN-NEXT: buffer_store_dword v23, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x54, v0 +; GCN-NEXT: buffer_store_dword v24, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x58, v0 +; GCN-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x5c, v0 +; GCN-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x60, v0 +; GCN-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x64, v0 +; GCN-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x68, v0 +; GCN-NEXT: buffer_store_dword v29, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x6c, v0 +; GCN-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x70, v0 +; GCN-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x74, v0 +; GCN-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x78, v0 +; GCN-NEXT: buffer_store_dword v33, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x7c, v0 +; GCN-NEXT: buffer_store_dword v34, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x80, v0 +; GCN-NEXT: buffer_store_dword v35, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x84, v0 +; GCN-NEXT: buffer_store_dword v36, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x88, v0 +; GCN-NEXT: buffer_store_dword v37, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x8c, v0 +; GCN-NEXT: buffer_store_dword v38, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x90, v0 +; GCN-NEXT: buffer_store_dword v39, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x94, v0 +; GCN-NEXT: buffer_store_dword v40, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x98, v0 +; GCN-NEXT: buffer_store_dword v41, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0x9c, v0 +; GCN-NEXT: buffer_store_dword v42, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xa0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v15 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v9, v16 +; GCN-NEXT: v_add_u32_e32 v1, 0xa4, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v10, v17 +; GCN-NEXT: v_add_u32_e32 v1, 0xa8, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v11, v18 +; GCN-NEXT: v_add_u32_e32 v1, 0xac, v0 +; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb0, v0 +; GCN-NEXT: buffer_store_dword v47, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb4, v0 +; GCN-NEXT: buffer_store_dword v48, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xb8, v0 +; GCN-NEXT: buffer_store_dword v49, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xbc, v0 +; GCN-NEXT: buffer_store_dword v50, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v1, 0xc0, v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc4, v0 +; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xc8, v0 +; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xcc, v0 +; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 8, v0 +; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 12, v0 +; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:256 +; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v0 +; GCN-NEXT: buffer_store_dword v51, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v0 +; GCN-NEXT: buffer_store_dword v52, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v0 +; GCN-NEXT: buffer_store_dword v53, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v0 +; GCN-NEXT: buffer_store_dword v54, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v0 +; GCN-NEXT: buffer_store_dword v55, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v0 +; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v0 +; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xec, v0 +; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v0 +; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v0 +; GCN-NEXT: buffer_store_dword v60, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v0 +; GCN-NEXT: buffer_store_dword v61, v1, s[0:3], 0 offen +; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v0 +; GCN-NEXT: buffer_store_dword v62, v1, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v1, 31, v2 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GCN-NEXT: v_add_u32_e32 v0, v0, v1 +; GCN-NEXT: v_add_u32_e32 v1, 4, v0 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %vec = load <32 x i64>, <32 x i64> addrspace(1)* %ptr + %elt = extractelement <32 x i64> %vec, i32 %idx + ret i64 %elt +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2495,3 +2495,125 @@ store double %ext, double addrspace(1)* %out ret void } + +define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_7: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_7: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_mov_b32_e32 v0, v7 +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 7 + ret i32 %elt +} + +define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_32: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_movk_i32 s4, 0x80 +; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 +; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_32: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_movk_i32 s4, 0x80 +; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s4 +; MOVREL-NEXT: v_mov_b32_e32 v3, s5 +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 32 + ret i32 %elt +} + +define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_33: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_movk_i32 s4, 0x80 +; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 +; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GPRIDX-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_33: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_movk_i32 s4, 0x80 +; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s4 +; MOVREL-NEXT: v_mov_b32_e32 v3, s5 +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 33 + ret i32 %elt +} + +define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { +; GPRIDX-LABEL: v_extract_v64i32_37: +; GPRIDX: ; %bb.0: +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_movk_i32 s4, 0x80 +; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 +; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GPRIDX-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GPRIDX-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GPRIDX-NEXT: s_waitcnt vmcnt(0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v5 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: v_extract_v64i32_37: +; MOVREL: ; %bb.0: +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_movk_i32 s4, 0x80 +; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: v_mov_b32_e32 v2, s4 +; MOVREL-NEXT: v_mov_b32_e32 v3, s5 +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; MOVREL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; MOVREL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_mov_b32_e32 v0, v5 +; MOVREL-NEXT: s_setpc_b64 s[30:31] + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr + %elt = extractelement <64 x i32> %vec, i32 37 + ret i32 %elt +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -714,208 +714,286 @@ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224 + ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 7 + %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: extract_vector_elt_33_v64s32 + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_33_v64s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 + 64, align 4, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 + 128, align 4, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](<16 x s32>), 32 + ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 33 + %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + S_ENDPGM 0, implicit %3 +... + +# Test handling of out of bounds indexes +--- +name: extract_vector_elt_64_65_v64s32 + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_64_65_v64s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s32), implicit [[DEF]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 64 + %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_CONSTANT i32 65 + %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %4 + S_ENDPGM 0, implicit %3, implicit %5 +... + +--- +name: extract_vector_elt_33_v64p3 + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_33_v64p3 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 + 64, align 4, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 + 128, align 4, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x p3>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) + ; CHECK: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD]](<16 x p3>) + ; CHECK: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD1]](<16 x p3>) + ; CHECK: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD2]](<16 x p3>) + ; CHECK: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[LOAD3]](<16 x p3>) + ; CHECK: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK: G_STORE [[UV1]](s32), [[PTR_ADD3]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) + ; CHECK: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK: G_STORE [[UV2]](s32), [[PTR_ADD4]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) + ; CHECK: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: G_STORE [[UV3]](s32), [[PTR_ADD5]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) + ; CHECK: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: G_STORE [[UV4]](s32), [[PTR_ADD6]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) + ; CHECK: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: G_STORE [[UV5]](s32), [[PTR_ADD7]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) + ; CHECK: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: G_STORE [[UV6]](s32), [[PTR_ADD8]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) + ; CHECK: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY1]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) + ; CHECK: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: G_STORE [[UV8]](s32), [[PTR_ADD10]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) + ; CHECK: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: G_STORE [[UV9]](s32), [[PTR_ADD11]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) + ; CHECK: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: G_STORE [[UV10]](s32), [[PTR_ADD12]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) + ; CHECK: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: G_STORE [[UV11]](s32), [[PTR_ADD13]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) + ; CHECK: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: G_STORE [[UV12]](s32), [[PTR_ADD14]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) + ; CHECK: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: G_STORE [[UV13]](s32), [[PTR_ADD15]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) + ; CHECK: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: G_STORE [[UV14]](s32), [[PTR_ADD16]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) + ; CHECK: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: G_STORE [[UV15]](s32), [[PTR_ADD17]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) + ; CHECK: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: G_STORE [[UV16]](s32), [[PTR_ADD18]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) + ; CHECK: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: G_STORE [[UV17]](s32), [[PTR_ADD19]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) + ; CHECK: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: G_STORE [[UV18]](s32), [[PTR_ADD20]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) + ; CHECK: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: G_STORE [[UV19]](s32), [[PTR_ADD21]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) + ; CHECK: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: G_STORE [[UV20]](s32), [[PTR_ADD22]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) + ; CHECK: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: G_STORE [[UV21]](s32), [[PTR_ADD23]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) + ; CHECK: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: G_STORE [[UV22]](s32), [[PTR_ADD24]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) + ; CHECK: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: G_STORE [[UV23]](s32), [[PTR_ADD25]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) + ; CHECK: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: G_STORE [[UV24]](s32), [[PTR_ADD26]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) + ; CHECK: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: G_STORE [[UV25]](s32), [[PTR_ADD27]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) + ; CHECK: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: G_STORE [[UV26]](s32), [[PTR_ADD28]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) + ; CHECK: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: G_STORE [[UV27]](s32), [[PTR_ADD29]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) + ; CHECK: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: G_STORE [[UV28]](s32), [[PTR_ADD30]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) + ; CHECK: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: G_STORE [[UV29]](s32), [[PTR_ADD31]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) + ; CHECK: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: G_STORE [[UV30]](s32), [[PTR_ADD32]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) + ; CHECK: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD33]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) + ; CHECK: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD34]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) + ; CHECK: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD35]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) + ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) + ; CHECK: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD36]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) + ; CHECK: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD37]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) + ; CHECK: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD38]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) + ; CHECK: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD39]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) + ; CHECK: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD40]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) + ; CHECK: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD41]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) + ; CHECK: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: G_STORE [[UV40]](s32), [[PTR_ADD42]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) + ; CHECK: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: G_STORE [[UV41]](s32), [[PTR_ADD43]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) + ; CHECK: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: G_STORE [[UV42]](s32), [[PTR_ADD44]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) + ; CHECK: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: G_STORE [[UV43]](s32), [[PTR_ADD45]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) + ; CHECK: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: G_STORE [[UV44]](s32), [[PTR_ADD46]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) + ; CHECK: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: G_STORE [[UV45]](s32), [[PTR_ADD47]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) + ; CHECK: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: G_STORE [[UV46]](s32), [[PTR_ADD48]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) + ; CHECK: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: G_STORE [[UV47]](s32), [[PTR_ADD49]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) + ; CHECK: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: G_STORE [[UV48]](s32), [[PTR_ADD50]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) + ; CHECK: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: G_STORE [[UV49]](s32), [[PTR_ADD51]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) + ; CHECK: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: G_STORE [[UV50]](s32), [[PTR_ADD52]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) + ; CHECK: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: G_STORE [[UV51]](s32), [[PTR_ADD53]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) + ; CHECK: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: G_STORE [[UV52]](s32), [[PTR_ADD54]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) + ; CHECK: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: G_STORE [[UV53]](s32), [[PTR_ADD55]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) + ; CHECK: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: G_STORE [[UV54]](s32), [[PTR_ADD56]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) + ; CHECK: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: G_STORE [[UV55]](s32), [[PTR_ADD57]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) + ; CHECK: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: G_STORE [[UV56]](s32), [[PTR_ADD58]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) + ; CHECK: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: G_STORE [[UV57]](s32), [[PTR_ADD59]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) + ; CHECK: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: G_STORE [[UV58]](s32), [[PTR_ADD60]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) + ; CHECK: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: G_STORE [[UV59]](s32), [[PTR_ADD61]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) + ; CHECK: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: G_STORE [[UV60]](s32), [[PTR_ADD62]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) + ; CHECK: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: G_STORE [[UV61]](s32), [[PTR_ADD63]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) + ; CHECK: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: G_STORE [[UV62]](s32), [[PTR_ADD64]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) + ; CHECK: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: G_STORE [[UV63]](s32), [[PTR_ADD65]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from %stack.0 + 28, addrspace 5) - ; CHECK: S_ENDPGM 0, implicit [[LOAD4]](s32) + ; CHECK: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load 4 from %stack.0 + 132, addrspace 5) + ; CHECK: S_ENDPGM 0, implicit [[LOAD4]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 7 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) - %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + %1:_(s32) = G_CONSTANT i32 33 + %2:_(<64 x p3>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %3:_(p3) = G_EXTRACT_VECTOR_ELT %2, %1 S_ENDPGM 0, implicit %3 ...