Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -622,7 +622,7 @@ } else MaxNumAGPRs = 0; } - } else if (ST.hasMAIInsts() && MFI->usesAGPRs(MF)) { + } else if (ST.hasMAIInsts()) { // In order to guarantee copying between AGPRs, we need a scratch VGPR // available at all times. reserveRegisterTuples(Reserved, AMDGPU::VGPR32); Index: llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -254,59 +254,59 @@ ; GFX908-LABEL: no_agpr_no_reserve: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX908-NEXT: v_lshlrev_b32_e32 v32, 7, v0 +; GFX908-NEXT: v_lshlrev_b32_e32 v4, 7, v0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16 -; GFX908-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1] -; GFX908-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48 -; GFX908-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:32 -; GFX908-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:80 -; GFX908-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:64 -; GFX908-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112 -; GFX908-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96 +; GFX908-NEXT: global_load_dwordx4 v[0:3], v4, s[0:1] offset:16 +; GFX908-NEXT: global_load_dwordx4 v[5:8], v4, s[0:1] +; GFX908-NEXT: global_load_dwordx4 v[9:12], v4, s[0:1] offset:48 +; GFX908-NEXT: global_load_dwordx4 v[13:16], v4, s[0:1] offset:32 +; GFX908-NEXT: global_load_dwordx4 v[17:20], v4, s[0:1] offset:80 +; GFX908-NEXT: global_load_dwordx4 v[21:24], v4, s[0:1] offset:64 +; GFX908-NEXT: global_load_dwordx4 v[25:28], v4, s[0:1] offset:112 +; GFX908-NEXT: global_load_dwordx4 v[33:36], v4, s[0:1] offset:96 ; GFX908-NEXT: s_waitcnt vmcnt(7) ; GFX908-NEXT: v_add_u32_e32 v3, v3, v3 ; GFX908-NEXT: v_add_u32_e32 v2, v2, v2 ; GFX908-NEXT: v_add_u32_e32 v1, v1, v1 ; GFX908-NEXT: v_add_u32_e32 v0, v0, v0 ; GFX908-NEXT: s_waitcnt vmcnt(6) +; GFX908-NEXT: v_add_u32_e32 v8, v8, v8 ; GFX908-NEXT: v_add_u32_e32 v7, v7, v7 ; GFX908-NEXT: v_add_u32_e32 v6, v6, v6 -; GFX908-NEXT: v_add_u32_e32 v5, v5, v5 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_add_u32_e32 v31, v31, v31 -; GFX908-NEXT: v_add_u32_e32 v30, v30, v30 -; GFX908-NEXT: v_add_u32_e32 v29, v29, v29 -; GFX908-NEXT: v_add_u32_e32 v28, v28, v28 -; GFX908-NEXT: v_add_u32_e32 v4, v4, v4 +; GFX908-NEXT: v_add_u32_e32 v36, v36, v36 +; GFX908-NEXT: v_add_u32_e32 v35, v35, v35 +; GFX908-NEXT: v_add_u32_e32 v34, v34, v34 +; GFX908-NEXT: v_add_u32_e32 v33, v33, v33 +; GFX908-NEXT: v_add_u32_e32 v5, v5, v5 +; GFX908-NEXT: v_add_u32_e32 v12, v12, v12 ; GFX908-NEXT: v_add_u32_e32 v11, v11, v11 ; GFX908-NEXT: v_add_u32_e32 v10, v10, v10 ; GFX908-NEXT: v_add_u32_e32 v9, v9, v9 -; GFX908-NEXT: v_add_u32_e32 v8, v8, v8 +; GFX908-NEXT: v_add_u32_e32 v16, v16, v16 ; GFX908-NEXT: v_add_u32_e32 v15, v15, v15 ; GFX908-NEXT: v_add_u32_e32 v14, v14, v14 ; GFX908-NEXT: v_add_u32_e32 v13, v13, v13 -; GFX908-NEXT: v_add_u32_e32 v12, v12, v12 +; GFX908-NEXT: v_add_u32_e32 v20, v20, v20 ; GFX908-NEXT: v_add_u32_e32 v19, v19, v19 ; GFX908-NEXT: v_add_u32_e32 v18, v18, v18 ; GFX908-NEXT: v_add_u32_e32 v17, v17, v17 -; GFX908-NEXT: v_add_u32_e32 v16, v16, v16 +; GFX908-NEXT: v_add_u32_e32 v24, v24, v24 ; GFX908-NEXT: v_add_u32_e32 v23, v23, v23 ; GFX908-NEXT: v_add_u32_e32 v22, v22, v22 ; GFX908-NEXT: v_add_u32_e32 v21, v21, v21 -; GFX908-NEXT: v_add_u32_e32 v20, v20, v20 +; GFX908-NEXT: v_add_u32_e32 v28, v28, v28 ; GFX908-NEXT: v_add_u32_e32 v27, v27, v27 ; GFX908-NEXT: v_add_u32_e32 v26, v26, v26 ; GFX908-NEXT: v_add_u32_e32 v25, v25, v25 -; GFX908-NEXT: v_add_u32_e32 v24, v24, v24 -; GFX908-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96 -; GFX908-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112 -; GFX908-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:64 -; GFX908-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:80 -; GFX908-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:32 -; GFX908-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48 -; GFX908-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] -; GFX908-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16 +; GFX908-NEXT: global_store_dwordx4 v4, v[33:36], s[0:1] offset:96 +; GFX908-NEXT: global_store_dwordx4 v4, v[25:28], s[0:1] offset:112 +; GFX908-NEXT: global_store_dwordx4 v4, v[21:24], s[0:1] offset:64 +; GFX908-NEXT: global_store_dwordx4 v4, v[17:20], s[0:1] offset:80 +; GFX908-NEXT: global_store_dwordx4 v4, v[13:16], s[0:1] offset:32 +; GFX908-NEXT: global_store_dwordx4 v4, v[9:12], s[0:1] offset:48 +; GFX908-NEXT: global_store_dwordx4 v4, v[5:8], s[0:1] +; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 ; GFX908-NEXT: s_endpgm ; ; GFX90A-LABEL: no_agpr_no_reserve: @@ -518,9 +518,358 @@ ret void } +define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg2, i64 %arg3, <2 x half> %arg4, <2 x half> %arg5) #3 { +; GFX908-LABEL: introduced_copy_to_sgpr: +; GFX908: ; %bb.0: ; %bb +; GFX908-NEXT: global_load_ushort v0, v[0:1], off glc +; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX908-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 +; GFX908-NEXT: s_load_dword s7, s[4:5], 0x18 +; GFX908-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 +; GFX908-NEXT: s_mov_b32 s6, 0 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: v_cvt_f32_u32_e32 v1, s1 +; GFX908-NEXT: s_sub_i32 s4, 0, s1 +; GFX908-NEXT: s_lshl_b64 s[10:11], s[2:3], 5 +; GFX908-NEXT: s_or_b32 s10, s10, 28 +; GFX908-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX908-NEXT: v_mov_b32_e32 v13, s10 +; GFX908-NEXT: s_lshr_b32 s12, s7, 16 +; GFX908-NEXT: v_mov_b32_e32 v32, s11 +; GFX908-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v1 +; GFX908-NEXT: v_cvt_f32_f16_e32 v28, s7 +; GFX908-NEXT: v_cvt_f32_f16_e32 v29, s12 +; GFX908-NEXT: v_accvgpr_write_b32 a0, v13 +; GFX908-NEXT: v_mul_lo_u32 v1, s4, v2 +; GFX908-NEXT: v_accvgpr_write_b32 a1, v32 +; GFX908-NEXT: v_mov_b32_e32 v11, s3 +; GFX908-NEXT: s_lshl_b64 s[4:5], s[8:9], 5 +; GFX908-NEXT: v_mul_hi_u32 v3, v2, v1 +; GFX908-NEXT: v_mov_b32_e32 v1, 0 +; GFX908-NEXT: v_mov_b32_e32 v10, s2 +; GFX908-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX908-NEXT: v_mul_hi_u32 v4, s0, v2 +; GFX908-NEXT: v_mul_lo_u32 v5, v4, s1 +; GFX908-NEXT: v_add_u32_e32 v6, 1, v4 +; GFX908-NEXT: v_sub_u32_e32 v5, s0, v5 +; GFX908-NEXT: v_cmp_le_u32_e32 vcc, s1, v5 +; GFX908-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX908-NEXT: v_subrev_u32_e32 v6, s1, v5 +; GFX908-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX908-NEXT: v_add_u32_e32 v7, 1, v4 +; GFX908-NEXT: v_cmp_le_u32_e32 vcc, s1, v5 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_and_b32_e32 v30, 0xffff, v0 +; GFX908-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc +; GFX908-NEXT: v_mul_lo_u32 v8, s9, v30 +; GFX908-NEXT: v_mul_hi_u32 v9, s8, v30 +; GFX908-NEXT: v_lshlrev_b64 v[2:3], 5, v[0:1] +; GFX908-NEXT: v_mul_lo_u32 v6, s8, v30 +; GFX908-NEXT: v_add_u32_e32 v7, v9, v8 +; GFX908-NEXT: v_accvgpr_write_b32 a2, v2 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v3 +; GFX908-NEXT: v_lshlrev_b64 v[6:7], 5, v[6:7] +; GFX908-NEXT: s_branch .LBB3_2 +; GFX908-NEXT: .LBB3_1: ; %bb12 +; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX908-NEXT: v_add_co_u32_e32 v10, vcc, v10, v0 +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v5, a3 +; GFX908-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v11, vcc +; GFX908-NEXT: v_accvgpr_read_b32 v2, a0 +; GFX908-NEXT: v_accvgpr_read_b32 v4, a2 +; GFX908-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: v_accvgpr_write_b32 a0, v2 +; GFX908-NEXT: v_accvgpr_write_b32 a1, v3 +; GFX908-NEXT: .LBB3_2: ; %bb9 +; GFX908-NEXT: ; =>This Loop Header: Depth=1 +; GFX908-NEXT: ; Child Loop BB3_5 Depth 2 +; GFX908-NEXT: s_cbranch_scc0 .LBB3_1 +; GFX908-NEXT: ; %bb.3: ; %bb14 +; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX908-NEXT: v_mov_b32_e32 v2, 0 +; GFX908-NEXT: v_mov_b32_e32 v3, 0 +; GFX908-NEXT: global_load_dwordx2 v[12:13], v[2:3], off +; GFX908-NEXT: s_mov_b32 s7, s6 +; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], 0, v[10:11] +; GFX908-NEXT: v_accvgpr_read_b32 v15, a1 +; GFX908-NEXT: v_mov_b32_e32 v17, s7 +; GFX908-NEXT: v_mov_b32_e32 v19, s7 +; GFX908-NEXT: v_accvgpr_read_b32 v14, a0 +; GFX908-NEXT: v_mov_b32_e32 v16, s6 +; GFX908-NEXT: v_mov_b32_e32 v18, s6 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_add_co_u32_e32 v22, vcc, 1, v12 +; GFX908-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v13, vcc +; GFX908-NEXT: v_mul_lo_u32 v23, s4, v20 +; GFX908-NEXT: v_mul_hi_u32 v24, s4, v22 +; GFX908-NEXT: v_mul_lo_u32 v25, s5, v22 +; GFX908-NEXT: v_mul_lo_u32 v31, s4, v22 +; GFX908-NEXT: v_mov_b32_e32 v21, s7 +; GFX908-NEXT: v_add_u32_e32 v22, v24, v23 +; GFX908-NEXT: v_add_u32_e32 v33, v22, v25 +; GFX908-NEXT: v_mov_b32_e32 v23, s7 +; GFX908-NEXT: v_mov_b32_e32 v20, s6 +; GFX908-NEXT: v_mov_b32_e32 v22, s6 +; GFX908-NEXT: s_branch .LBB3_5 +; GFX908-NEXT: .LBB3_4: ; %bb58 +; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2 +; GFX908-NEXT: v_add_co_u32_e32 v12, vcc, v12, v30 +; GFX908-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc +; GFX908-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[12:13] +; GFX908-NEXT: v_add_co_u32_e64 v14, s[2:3], v14, v6 +; GFX908-NEXT: v_addc_co_u32_e64 v15, s[2:3], v15, v7, s[2:3] +; GFX908-NEXT: s_and_b64 vcc, exec, vcc +; GFX908-NEXT: s_cbranch_vccz .LBB3_1 +; GFX908-NEXT: .LBB3_5: ; %bb16 +; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1 +; GFX908-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX908-NEXT: v_add_co_u32_e32 v24, vcc, v14, v31 +; GFX908-NEXT: v_addc_co_u32_e32 v25, vcc, v15, v33, vcc +; GFX908-NEXT: global_load_dword v35, v[24:25], off offset:-12 glc +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: global_load_dword v34, v[24:25], off offset:-8 glc +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: global_load_dword v26, v[24:25], off offset:-4 glc +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: global_load_dword v24, v[24:25], off glc +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: ds_read_b64 v[24:25], v1 +; GFX908-NEXT: ds_read_b64 v[26:27], v0 +; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: s_cbranch_vccnz .LBB3_4 +; GFX908-NEXT: ; %bb.6: ; %bb51 +; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2 +; GFX908-NEXT: v_cvt_f32_f16_sdwa v8, v35 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX908-NEXT: v_cvt_f32_f16_e32 v9, v35 +; GFX908-NEXT: v_cvt_f32_f16_sdwa v35, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX908-NEXT: v_cvt_f32_f16_e32 v34, v34 +; GFX908-NEXT: v_add_f32_e32 v4, v28, v24 +; GFX908-NEXT: v_add_f32_e32 v5, v29, v25 +; GFX908-NEXT: v_add_f32_e32 v2, 0, v24 +; GFX908-NEXT: v_add_f32_e32 v3, 0, v25 +; GFX908-NEXT: v_add_f32_e32 v8, v8, v27 +; GFX908-NEXT: v_add_f32_e32 v9, v9, v26 +; GFX908-NEXT: v_add_f32_e32 v25, v35, v25 +; GFX908-NEXT: v_add_f32_e32 v24, v34, v24 +; GFX908-NEXT: v_add_f32_e32 v17, v17, v5 +; GFX908-NEXT: v_add_f32_e32 v16, v16, v4 +; GFX908-NEXT: v_add_f32_e32 v19, v19, v3 +; GFX908-NEXT: v_add_f32_e32 v18, v18, v2 +; GFX908-NEXT: v_add_f32_e32 v20, v20, v9 +; GFX908-NEXT: v_add_f32_e32 v21, v21, v8 +; GFX908-NEXT: v_add_f32_e32 v22, v22, v24 +; GFX908-NEXT: v_add_f32_e32 v23, v23, v25 +; GFX908-NEXT: s_branch .LBB3_4 +; +; GFX90A-LABEL: introduced_copy_to_sgpr: +; GFX90A: ; %bb.0: ; %bb +; GFX90A-NEXT: global_load_ushort v10, v[0:1], off glc +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8 +; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x10 +; GFX90A-NEXT: s_load_dword s2, s[4:5], 0x18 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_mov_b32 s4, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX90A-NEXT: s_sub_i32 s5, 0, s7 +; GFX90A-NEXT: s_lshr_b32 s12, s2, 16 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s2 +; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s12 +; GFX90A-NEXT: s_lshl_b64 s[10:11], s[8:9], 5 +; GFX90A-NEXT: s_or_b32 s10, s10, 28 +; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5 +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[8:9], s[8:9] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[10:11], s[10:11] op_sel:[0,1] +; GFX90A-NEXT: v_mul_lo_u32 v8, s5, v0 +; GFX90A-NEXT: v_mul_hi_u32 v8, v0, v8 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v8 +; GFX90A-NEXT: v_mul_hi_u32 v0, s6, v0 +; GFX90A-NEXT: v_mul_lo_u32 v8, v0, s7 +; GFX90A-NEXT: v_sub_u32_e32 v8, s6, v8 +; GFX90A-NEXT: v_add_u32_e32 v9, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s7, v8 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v9, s7, v8 +; GFX90A-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc +; GFX90A-NEXT: v_add_u32_e32 v9, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s7, v8 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc +; GFX90A-NEXT: v_lshlrev_b64 v[8:9], 5, v[0:1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_and_b32_e32 v30, 0xffff, v10 +; GFX90A-NEXT: v_mul_lo_u32 v11, s1, v30 +; GFX90A-NEXT: v_mul_hi_u32 v12, s0, v30 +; GFX90A-NEXT: v_mul_lo_u32 v10, s0, v30 +; GFX90A-NEXT: v_add_u32_e32 v11, v12, v11 +; GFX90A-NEXT: v_lshlrev_b64 v[10:11], 5, v[10:11] +; GFX90A-NEXT: v_pk_mov_b32 v[12:13], 0, 0 +; GFX90A-NEXT: s_branch .LBB3_2 +; GFX90A-NEXT: .LBB3_1: ; %bb12 +; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, v4, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX90A-NEXT: v_add_co_u32_e32 v6, vcc, v6, v8 +; GFX90A-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v9, vcc +; GFX90A-NEXT: .LBB3_2: ; %bb9 +; GFX90A-NEXT: ; =>This Loop Header: Depth=1 +; GFX90A-NEXT: ; Child Loop BB3_5 Depth 2 +; GFX90A-NEXT: s_cbranch_scc0 .LBB3_1 +; GFX90A-NEXT: ; %bb.3: ; %bb14 +; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX90A-NEXT: global_load_dwordx2 v[14:15], v[12:13], off +; GFX90A-NEXT: s_mov_b32 s5, s4 +; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], 0, v[4:5] +; GFX90A-NEXT: v_pk_mov_b32 v[16:17], v[6:7], v[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[18:19], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[20:21], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[22:23], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_add_co_u32_e32 v24, vcc, 1, v14 +; GFX90A-NEXT: v_addc_co_u32_e32 v25, vcc, 0, v15, vcc +; GFX90A-NEXT: v_mul_lo_u32 v25, s2, v25 +; GFX90A-NEXT: v_mul_hi_u32 v26, s2, v24 +; GFX90A-NEXT: v_mul_lo_u32 v27, s3, v24 +; GFX90A-NEXT: v_mul_lo_u32 v31, s2, v24 +; GFX90A-NEXT: v_add_u32_e32 v24, v26, v25 +; GFX90A-NEXT: v_add_u32_e32 v32, v24, v27 +; GFX90A-NEXT: v_pk_mov_b32 v[24:25], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: s_branch .LBB3_5 +; GFX90A-NEXT: .LBB3_4: ; %bb58 +; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2 +; GFX90A-NEXT: v_add_co_u32_e32 v14, vcc, v14, v30 +; GFX90A-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v15, vcc +; GFX90A-NEXT: v_add_co_u32_e32 v16, vcc, v16, v10 +; GFX90A-NEXT: v_addc_co_u32_e32 v17, vcc, v17, v11, vcc +; GFX90A-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] +; GFX90A-NEXT: s_and_b64 vcc, exec, vcc +; GFX90A-NEXT: s_cbranch_vccz .LBB3_1 +; GFX90A-NEXT: .LBB3_5: ; %bb16 +; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1 +; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX90A-NEXT: v_add_co_u32_e32 v26, vcc, v16, v31 +; GFX90A-NEXT: v_addc_co_u32_e32 v27, vcc, v17, v32, vcc +; GFX90A-NEXT: global_load_dword v34, v[26:27], off offset:-12 glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v33, v[26:27], off offset:-8 glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v28, v[26:27], off offset:-4 glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_load_dword v28, v[26:27], off glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; kill: killed $vgpr26 killed $vgpr27 +; GFX90A-NEXT: ds_read_b64 v[26:27], v1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ds_read_b64 v[28:29], v0 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_cbranch_vccnz .LBB3_4 +; GFX90A-NEXT: ; %bb.6: ; %bb51 +; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v35, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v34, v34 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v37, v33 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v36, v33 +; GFX90A-NEXT: v_pk_add_f32 v[38:39], v[2:3], v[26:27] +; GFX90A-NEXT: v_pk_add_f32 v[40:41], v[26:27], 0 op_sel_hi:[1,0] +; GFX90A-NEXT: v_pk_add_f32 v[28:29], v[34:35], v[28:29] +; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[36:37], v[26:27] +; GFX90A-NEXT: v_pk_add_f32 v[18:19], v[18:19], v[38:39] +; GFX90A-NEXT: v_pk_add_f32 v[20:21], v[20:21], v[40:41] +; GFX90A-NEXT: v_pk_add_f32 v[22:23], v[22:23], v[28:29] +; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[24:25], v[26:27] +; GFX90A-NEXT: s_branch .LBB3_4 +bb: + %i = load volatile i16, i16 addrspace(4)* undef, align 2 + %i6 = zext i16 %i to i64 + %i7 = udiv i32 %arg1, %arg2 + %i8 = zext i32 %i7 to i64 + br label %bb9 + +bb9: ; preds = %bb12, %bb + %i10 = phi i64 [ %arg3, %bb ], [ %i13, %bb12 ] + %i11 = icmp slt i64 %i10, 0 + br i1 undef, label %bb14, label %bb12 + +bb12: ; preds = %bb58, %bb9 + %i13 = add nuw nsw i64 %i10, %i8 + br label %bb9 + +bb14: ; preds = %bb9 + %i15 = load i64, i64 addrspace(1)* null, align 8 + br label %bb16 + +bb16: ; preds = %bb58, %bb14 + %i17 = phi i64 [ %i65, %bb58 ], [ %i15, %bb14 ] + %i18 = phi <2 x float> [ %i59, %bb58 ], [ zeroinitializer, %bb14 ] + %i19 = phi <2 x float> [ %i60, %bb58 ], [ zeroinitializer, %bb14 ] + %i20 = phi <2 x float> [ %i61, %bb58 ], [ zeroinitializer, %bb14 ] + %i21 = phi <2 x float> [ %i62, %bb58 ], [ zeroinitializer, %bb14 ] + %i22 = add nsw i64 %i17, 1 + %i23 = mul nsw i64 %i22, %arg + %i24 = add nsw i64 %i23, %i10 + %i25 = getelementptr inbounds [16 x half], [16 x half] addrspace(1)* null, i64 %i24, i64 8 + %i26 = bitcast half addrspace(1)* %i25 to <2 x half> addrspace(1)* + %i27 = load volatile <2 x half>, <2 x half> addrspace(1)* %i26, align 16 + %i28 = getelementptr inbounds [16 x half], [16 x half] addrspace(1)* null, i64 %i24, i64 10 + %i29 = bitcast half addrspace(1)* %i28 to <2 x half> addrspace(1)* + %i30 = load volatile <2 x half>, <2 x half> addrspace(1)* %i29, align 4 + %i31 = getelementptr inbounds [16 x half], [16 x half] addrspace(1)* null, i64 %i24, i64 12 + %i32 = bitcast half addrspace(1)* %i31 to <2 x half> addrspace(1)* + %i33 = load volatile <2 x half>, <2 x half> addrspace(1)* %i32, align 8 + %i34 = getelementptr inbounds [16 x half], [16 x half] addrspace(1)* null, i64 %i24, i64 14 + %i35 = bitcast half addrspace(1)* %i34 to <2 x half> addrspace(1)* + %i36 = load volatile <2 x half>, <2 x half> addrspace(1)* %i35, align 4 + %i37 = fpext <2 x half> %arg4 to <2 x float> + %i39 = fpext <2 x half> %i27 to <2 x float> + %i40 = fpext <2 x half> %i30 to <2 x float> + %i41 = fpext <2 x half> %i33 to <2 x float> + %i42 = fpext <2 x half> %i36 to <2 x float> + %i43 = load volatile <2 x float>, <2 x float> addrspace(3)* null, align 8 + %i44 = fadd contract <2 x float> %i37, %i43 + %i45 = fadd contract <2 x float> %i43, zeroinitializer + %i46 = load volatile <2 x float>, <2 x float> addrspace(3)* undef, align 32 + %i47 = fadd contract <2 x float> %i39, %i46 + %i48 = fadd contract <2 x float> %i40, %i43 + %i49 = fadd contract <2 x float> %i41, zeroinitializer + %i50 = fadd contract <2 x float> %i42, zeroinitializer + fence syncscope("workgroup") acquire + br i1 %i11, label %bb58, label %bb51 + +bb51: ; preds = %bb16 + %i52 = fadd contract <2 x float> %i18, %i44 + %i53 = fadd contract <2 x float> %i19, %i45 + %i54 = fadd contract <2 x float> %i20, %i47 + %i55 = fadd contract <2 x float> %i21, %i48 + %i56 = fadd contract <2 x float> %i49, zeroinitializer + %i57 = fadd contract <2 x float> %i50, zeroinitializer + br label %bb58 + +bb58: ; preds = %bb51, %bb16 + %i59 = phi <2 x float> [ %i18, %bb16 ], [ %i52, %bb51 ] + %i60 = phi <2 x float> [ %i19, %bb16 ], [ %i53, %bb51 ] + %i61 = phi <2 x float> [ %i20, %bb16 ], [ %i54, %bb51 ] + %i62 = phi <2 x float> [ %i21, %bb16 ], [ %i55, %bb51 ] + %i63 = phi <2 x float> [ zeroinitializer, %bb16 ], [ %i56, %bb51 ] + %i64 = phi <2 x float> [ zeroinitializer, %bb16 ], [ %i57, %bb51 ] + %i65 = add nsw i64 %i17, %i6 + %i66 = icmp slt i64 %i65, 0 + br i1 %i66, label %bb16, label %bb12 +} + declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1 declare i32 @llvm.amdgcn.workitem.id.x() #2 attributes #0 = { "amdgpu-waves-per-eu"="6,6" } attributes #1 = { convergent nounwind readnone willreturn } attributes #2 = { nounwind readnone willreturn } +attributes #3 = { "amdgpu-waves-per-eu"="7,7" } Index: llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir @@ -0,0 +1,471 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,0 -stop-after=postrapseudos -o - %s | FileCheck -check-prefix=GFX908 %s + +# This testcase has a long sequence of sgpr to vgpr copies with a lot of vgpr +# pressure, encouraging the allocator to displace some into AGPRs. This +# introduces SGPR to AGPR copies which require a reserved temporary VGPR to +# handle. + +--- | + + define amdgpu_kernel void @regalloc_introduces_s_to_a_copy() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="7,7" } + +... +--- +name: regalloc_introduces_s_to_a_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 + + ; GFX908-LABEL: name: regalloc_introduces_s_to_a_copy + ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, $vgpr32_vgpr33_vgpr34_vgpr35, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr7, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr32 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr33 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr34 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr35 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: renamable $sgpr4 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr5 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr6 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr7 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr8 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr9 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr10 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr11 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr12 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr13 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr14 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr15 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr16 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr18 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr19 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr20 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr21 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr22 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr23 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr24 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr25 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr26 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr27 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr28 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr29 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr30 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr31 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr34 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr35 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr36 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr37 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr38 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr39 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: renamable $sgpr40 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 killed $sgpr4, implicit $exec + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr9, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr10, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr11, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr12, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr13, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr16, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr17, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr18, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr19, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr20, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr21, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr22, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr23, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, 0, implicit $exec :: (store (s32) into %stack.19, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr24, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr25, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, 0, implicit $exec :: (store (s32) into %stack.21, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr26, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, 0, implicit $exec :: (store (s32) into %stack.22, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr27, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, 0, implicit $exec :: (store (s32) into %stack.23, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr28, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, 0, implicit $exec :: (store (s32) into %stack.24, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr29, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, 0, implicit $exec :: (store (s32) into %stack.25, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr30, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, 0, implicit $exec :: (store (s32) into %stack.26, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr31, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, 0, implicit $exec :: (store (s32) into %stack.27, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, 0, implicit $exec :: (store (s32) into %stack.28, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr35, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, 0, implicit $exec :: (store (s32) into %stack.29, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr36, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, 0, implicit $exec :: (store (s32) into %stack.30, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr37, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, 0, implicit $exec :: (store (s32) into %stack.31, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr38, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, 0, implicit $exec :: (store (s32) into %stack.32, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr39, implicit $exec, implicit $exec + ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, 0, implicit $exec :: (store (s32) into %stack.33, addrspace 5) + ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr40, implicit $exec, implicit $exec + ; GFX908-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, implicit $vgpr35 + ; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec, implicit $exec + ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec + ; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX908-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GFX908-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GFX908-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GFX908-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GFX908-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) + ; GFX908-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) + ; GFX908-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) + ; GFX908-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5) + ; GFX908-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5) + ; GFX908-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5) + ; GFX908-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5) + ; GFX908-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5) + ; GFX908-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5) + ; GFX908-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5) + ; GFX908-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5) + ; GFX908-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5) + ; GFX908-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5) + ; GFX908-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, 0, implicit $exec :: (load (s32) from %stack.19, addrspace 5) + ; GFX908-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5) + ; GFX908-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, 0, implicit $exec :: (load (s32) from %stack.21, addrspace 5) + ; GFX908-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, 0, implicit $exec :: (load (s32) from %stack.22, addrspace 5) + ; GFX908-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, 0, implicit $exec :: (load (s32) from %stack.23, addrspace 5) + ; GFX908-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, 0, implicit $exec :: (load (s32) from %stack.24, addrspace 5) + ; GFX908-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, 0, implicit $exec :: (load (s32) from %stack.25, addrspace 5) + ; GFX908-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, 0, implicit $exec :: (load (s32) from %stack.26, addrspace 5) + ; GFX908-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, 0, implicit $exec :: (load (s32) from %stack.27, addrspace 5) + ; GFX908-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, 0, implicit $exec :: (load (s32) from %stack.28, addrspace 5) + ; GFX908-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, 0, implicit $exec :: (load (s32) from %stack.29, addrspace 5) + ; GFX908-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, 0, implicit $exec :: (load (s32) from %stack.30, addrspace 5) + ; GFX908-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, 0, implicit $exec :: (load (s32) from %stack.31, addrspace 5) + ; GFX908-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, 0, implicit $exec :: (load (s32) from %stack.32, addrspace 5) + ; GFX908-NEXT: $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 136, 0, 0, 0, implicit $exec :: (load (s32) from %stack.33, addrspace 5) + ; GFX908-NEXT: S_NOP 0, implicit renamable $agpr0, implicit killed renamable $vgpr1, implicit killed renamable $vgpr2, implicit killed renamable $vgpr3, implicit killed renamable $vgpr4, implicit killed renamable $vgpr5, implicit killed renamable $vgpr6, implicit killed renamable $vgpr7, implicit killed renamable $vgpr8, implicit killed renamable $vgpr9, implicit killed renamable $vgpr10, implicit killed renamable $vgpr11, implicit killed renamable $vgpr12, implicit killed renamable $vgpr13, implicit killed renamable $vgpr14, implicit killed renamable $vgpr15, implicit killed renamable $vgpr16, implicit killed renamable $vgpr17, implicit killed renamable $vgpr18, implicit killed renamable $vgpr19, implicit killed renamable $vgpr20, implicit killed renamable $vgpr21, implicit killed renamable $vgpr22, implicit killed renamable $vgpr23, implicit killed renamable $vgpr24, implicit killed renamable $vgpr25, implicit killed renamable $vgpr26, implicit killed renamable $vgpr27, implicit killed renamable $vgpr28, implicit killed renamable $vgpr29, implicit killed renamable $vgpr30, implicit killed renamable $vgpr31, implicit killed renamable $vgpr33, implicit killed renamable $vgpr35, implicit killed renamable $vgpr34 + ; GFX908-NEXT: S_ENDPGM 0, implicit killed renamable $agpr0 + %v0:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v1:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v2:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v3:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v4:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v5:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v6:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v7:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v8:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v9:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v10:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v11:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v12:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v13:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v14:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v15:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v16:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v17:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v18:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v19:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v20:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v21:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v22:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v23:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v24:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v25:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v26:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v27:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v28:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v29:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v30:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v31:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v32:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v33:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v34:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %v35:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec + %s0:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s1:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s2:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s3:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s4:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s5:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s6:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s7:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s8:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s9:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s10:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s11:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s12:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s13:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s14:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s15:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s16:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s17:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s18:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s19:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s20:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s21:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s22:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s23:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s24:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s25:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s26:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s27:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s28:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s29:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s30:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s31:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s32:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s33:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %s34:sgpr_32 = S_LOAD_DWORD_IMM undef $sgpr8_sgpr9, 0, 0 + %71:vgpr_32 = COPY %s0 + %72:vgpr_32 = COPY %s1 + %73:vgpr_32 = COPY %s2 + %74:vgpr_32 = COPY %s3 + %75:vgpr_32 = COPY %s4 + %76:vgpr_32 = COPY %s5 + %77:vgpr_32 = COPY %s6 + %78:vgpr_32 = COPY %s7 + %79:vgpr_32 = COPY %s8 + %80:vgpr_32 = COPY %s9 + %81:vgpr_32 = COPY %s10 + %82:vgpr_32 = COPY %s11 + %83:vgpr_32 = COPY %s12 + %84:vgpr_32 = COPY %s13 + %85:vgpr_32 = COPY %s14 + %86:vgpr_32 = COPY %s15 + %87:vgpr_32 = COPY %s16 + %88:vgpr_32 = COPY %s17 + %89:vgpr_32 = COPY %s18 + %90:vgpr_32 = COPY %s19 + %91:vgpr_32 = COPY %s20 + %92:vgpr_32 = COPY %s21 + %93:vgpr_32 = COPY %s22 + %94:vgpr_32 = COPY %s23 + %95:vgpr_32 = COPY %s24 + %96:vgpr_32 = COPY %s25 + %97:vgpr_32 = COPY %s26 + %98:vgpr_32 = COPY %s27 + %99:vgpr_32 = COPY %s28 + %100:vgpr_32 = COPY %s29 + %101:vgpr_32 = COPY %s30 + %102:vgpr_32 = COPY %s31 + %103:vgpr_32 = COPY %s32 + %104:vgpr_32 = COPY %s33 + %105:vgpr_32 = COPY %s34 + S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, implicit $vgpr35 + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v1, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v2, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v3, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v4, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v5, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v6, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v7, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v8, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v9, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v10, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v11, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v12, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v13, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v14, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v15, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v16, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v17, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v18, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v19, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v20, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v21, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v22, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v23, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v24, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v25, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v26, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v27, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v28, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v29, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v30, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v31, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v32, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v33, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v34, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %v35, 0, 0, implicit $exec + S_NOP 0, implicit %71, implicit %72, implicit %73, implicit %74, implicit %75, implicit %76, implicit %77, implicit %78, implicit %79, implicit %80, implicit %81, implicit %82, implicit %83, implicit %84, implicit %85, implicit %86, implicit %87, implicit %88, implicit %89, implicit %90, implicit %91, implicit %92, implicit %93, implicit %94, implicit %95, implicit %96, implicit %97, implicit %98, implicit %99, implicit %100, implicit %101, implicit %102, implicit %103, implicit %104, implicit %105 + S_ENDPGM 0, implicit %71 + +... Index: llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll +++ llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll @@ -194,11 +194,13 @@ ; GFX908-NOT: buffer_ ; GFX908-DAG: v_accvgpr_read_b32 -; GCN: NumVgprs: 256 +; GFX900: NumVgprs: 256 ; GFX900: ScratchSize: 148 +; GFX908: NumVgprs: 255 ; GFX908: ScratchSize: 0 ; GCN: VGPRBlocks: 63 -; GCN: NumVGPRsForWavesPerEU: 256 +; GFX900: NumVGPRsForWavesPerEU: 256 +; GFX908: NumVGPRsForWavesPerEU: 255 define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %p) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid @@ -242,11 +244,13 @@ ; GFX908-NOT: buffer_ ; GFX908-DAG: v_accvgpr_read_b32 -; GCN: NumVgprs: 256 +; GFX900: NumVgprs: 256 +; GFX908: NumVgprs: 253 ; GFX900: ScratchSize: 2052 ; GFX908: ScratchSize: 0 ; GCN: VGPRBlocks: 63 -; GCN: NumVGPRsForWavesPerEU: 256 +; GFX900: NumVGPRsForWavesPerEU: 256 +; GFX908: NumVGPRsForWavesPerEU: 253 define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(<32 x float> addrspace(1)* %p) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid