diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -483,13 +483,6 @@ IsSGPR = false; IsAGPR = true; Width = 32; - } else { - assert((AMDGPU::TTMP_32RegClass.contains(Reg) || - AMDGPU::TTMP_64RegClass.contains(Reg) || - AMDGPU::TTMP_128RegClass.contains(Reg) || - AMDGPU::TTMP_256RegClass.contains(Reg) || - AMDGPU::TTMP_512RegClass.contains(Reg)) && - "Unknown register class"); } unsigned HWReg = TRI.getHWRegIndex(Reg); int MaxUsed = HWReg + Width - 1; diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-agpr-hi16.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-agpr-hi16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-agpr-hi16.ll @@ -0,0 +1,1092 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A + +define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 %arg2, i1 %arg3, i1 %arg4, i1 %arg5, i1 %arg6, ptr addrspace(3) %arg7, ptr addrspace(3) %arg8, ptr addrspace(3) %arg9, ptr addrspace(3) %arg10) { +; GFX90A-LABEL: f1: +; GFX90A: ; %bb.0: ; %bb +; GFX90A-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; GFX90A-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; GFX90A-NEXT: s_add_u32 s0, s0, s17 +; GFX90A-NEXT: s_load_dwordx4 s[20:23], s[8:9], 0x18 +; GFX90A-NEXT: s_load_dwordx4 s[24:27], s[8:9], 0x0 +; GFX90A-NEXT: s_load_dwordx2 s[58:59], s[8:9], 0x10 +; GFX90A-NEXT: s_load_dword s33, s[8:9], 0x18 +; GFX90A-NEXT: s_addc_u32 s1, s1, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_bitcmp1_b32 s20, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: s_cselect_b64 s[12:13], -1, 0 +; GFX90A-NEXT: ds_read_b32 v2, v3 +; GFX90A-NEXT: s_xor_b64 s[28:29], s[12:13], -1 +; GFX90A-NEXT: s_bitcmp1_b32 s33, 8 +; GFX90A-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GFX90A-NEXT: v_mov_b32_e32 v31, v0 +; GFX90A-NEXT: s_mov_b64 s[34:35], -1 +; GFX90A-NEXT: s_xor_b64 s[30:31], s[18:19], -1 +; GFX90A-NEXT: s_mov_b64 s[18:19], 0 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[28:29] +; GFX90A-NEXT: s_mov_b32 s32, 0 +; GFX90A-NEXT: s_cbranch_vccz .LBB0_18 +; GFX90A-NEXT: ; %bb.1: ; %bb103 +; GFX90A-NEXT: s_mov_b64 s[34:35], 0 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[30:31] +; GFX90A-NEXT: ; implicit-def: $vgpr24 +; GFX90A-NEXT: ; implicit-def: $agpr0 +; GFX90A-NEXT: ; implicit-def: $vgpr26_lo16 +; GFX90A-NEXT: ; implicit-def: $vgpr26_hi16 +; GFX90A-NEXT: ; implicit-def: $vgpr20 +; GFX90A-NEXT: ; implicit-def: $vgpr22 +; GFX90A-NEXT: ; implicit-def: $vgpr24_lo16 +; GFX90A-NEXT: ; implicit-def: $vgpr24_hi16 +; GFX90A-NEXT: ; implicit-def: $agpr0_lo16 +; GFX90A-NEXT: ; implicit-def: $agpr0_hi16 +; GFX90A-NEXT: ; implicit-def: $vgpr26 +; GFX90A-NEXT: ; implicit-def: $vgpr20_lo16 +; GFX90A-NEXT: ; implicit-def: $vgpr20_hi16 +; GFX90A-NEXT: ; implicit-def: $vgpr22_lo16 +; GFX90A-NEXT: ; implicit-def: $vgpr22_hi16 +; GFX90A-NEXT: s_cbranch_vccz .LBB0_18 +; GFX90A-NEXT: ; %bb.2: ; %bb105 +; GFX90A-NEXT: s_load_dword s17, s[8:9], 0x28 +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: ds_read_b64 v[24:25], v0 +; GFX90A-NEXT: v_mov_b32_e32 v0, s23 +; GFX90A-NEXT: ds_read_b64 v[22:23], v0 +; GFX90A-NEXT: v_mov_b32_e32 v0, s21 +; GFX90A-NEXT: ds_read_b64 v[20:21], v0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, s17 +; GFX90A-NEXT: ds_read_b64 a[0:1], v0 +; GFX90A-NEXT: v_mov_b32_e32 v0, s22 +; GFX90A-NEXT: ds_read_b64 v[26:27], v0 +; GFX90A-NEXT: s_mov_b64 s[36:37], -1 +; GFX90A-NEXT: s_mov_b32 s20, 0 +; GFX90A-NEXT: s_mov_b32 s17, 0 +; GFX90A-NEXT: v_and_b32_e32 v4, 0x3ff, v31 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[34:35] +; GFX90A-NEXT: s_cbranch_vccnz .LBB0_19 +; GFX90A-NEXT: .LBB0_3: +; GFX90A-NEXT: v_mov_b32_e32 v17, s20 +; GFX90A-NEXT: v_mov_b32_e32 v19, s17 +; GFX90A-NEXT: s_mov_b64 s[56:57], 0 +; GFX90A-NEXT: s_mov_b64 s[54:55], 0 +; GFX90A-NEXT: s_mov_b64 s[52:53], 0 +; GFX90A-NEXT: s_mov_b64 s[50:51], 0 +; GFX90A-NEXT: s_mov_b64 s[48:49], 0 +; GFX90A-NEXT: s_mov_b64 s[46:47], 0 +; GFX90A-NEXT: s_mov_b64 s[44:45], 0 +; GFX90A-NEXT: s_mov_b64 s[42:43], 0 +; GFX90A-NEXT: s_mov_b64 s[40:41], 0 +; GFX90A-NEXT: s_mov_b64 s[38:39], 0 +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr60_vgpr61 +; GFX90A-NEXT: ; implicit-def: $vgpr58_vgpr59 +; GFX90A-NEXT: ; implicit-def: $vgpr56_vgpr57 +; GFX90A-NEXT: ; implicit-def: $vgpr44_vgpr45 +; GFX90A-NEXT: ; implicit-def: $vgpr42_vgpr43 +; GFX90A-NEXT: ; implicit-def: $vgpr40_vgpr41 +; GFX90A-NEXT: ; implicit-def: $vgpr46_vgpr47 +; GFX90A-NEXT: v_mov_b32_e32 v16, s20 +; GFX90A-NEXT: v_mov_b32_e32 v30, s20 +; GFX90A-NEXT: v_mov_b32_e32 v18, s20 +; GFX90A-NEXT: v_mov_b32_e32 v54, s17 +; GFX90A-NEXT: v_mov_b32_e32 v15, s20 +; GFX90A-NEXT: v_mov_b32_e32 v14, s20 +; GFX90A-NEXT: s_mov_b64 s[34:35], 0 +; GFX90A-NEXT: s_mov_b64 s[58:59], 0 +; GFX90A-NEXT: s_and_saveexec_b64 s[24:25], s[36:37] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_44 +; GFX90A-NEXT: .LBB0_4: ; %Flow32 +; GFX90A-NEXT: s_or_b64 exec, exec, s[24:25] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[18:19] +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execz .LBB0_55 +; GFX90A-NEXT: .LBB0_5: ; %bb89 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v10, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[56:57] +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_56 +; GFX90A-NEXT: .LBB0_6: ; %Flow34 +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[54:55] +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execz .LBB0_57 +; GFX90A-NEXT: .LBB0_7: ; %bb79 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v6, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[52:53] +; GFX90A-NEXT: s_xor_b64 s[36:37], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_58 +; GFX90A-NEXT: .LBB0_8: ; %Flow36 +; GFX90A-NEXT: s_or_b64 exec, exec, s[36:37] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[50:51] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_59 +; GFX90A-NEXT: .LBB0_9: ; %bb67 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v47, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v46, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[48:49] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_60 +; GFX90A-NEXT: .LBB0_10: ; %Flow38 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[46:47] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_61 +; GFX90A-NEXT: .LBB0_11: ; %bb54 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v61, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v60, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[44:45] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_62 +; GFX90A-NEXT: .LBB0_12: ; %Flow40 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[42:43] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_63 +; GFX90A-NEXT: .LBB0_13: ; %bb40 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v57, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v56, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[40:41] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_64 +; GFX90A-NEXT: .LBB0_14: ; %Flow42 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[38:39] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_65 +; GFX90A-NEXT: .LBB0_15: ; %bb26 +; GFX90A-NEXT: buffer_store_dword v43, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v42, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[34:35] +; GFX90A-NEXT: s_cbranch_vccz .LBB0_66 +; GFX90A-NEXT: .LBB0_16: ; %Flow44 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[58:59] +; GFX90A-NEXT: s_cbranch_execz .LBB0_67 +; GFX90A-NEXT: .LBB0_17: ; %UnifiedUnreachableBlock +; GFX90A-NEXT: ; divergent unreachable +; GFX90A-NEXT: s_endpgm +; GFX90A-NEXT: .LBB0_18: +; GFX90A-NEXT: s_mov_b64 s[36:37], 0 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: ; implicit-def: $sgpr20 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr21 +; GFX90A-NEXT: ; implicit-def: $vgpr23 +; GFX90A-NEXT: ; implicit-def: $vgpr25 +; GFX90A-NEXT: ; implicit-def: $vgpr27 +; GFX90A-NEXT: v_and_b32_e32 v4, 0x3ff, v31 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[34:35] +; GFX90A-NEXT: s_cbranch_vccz .LBB0_3 +; GFX90A-NEXT: .LBB0_19: ; %bb15 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3] +; GFX90A-NEXT: v_mov_b32_e32 v5, s25 +; GFX90A-NEXT: v_add_co_u32_e32 v46, vcc, s24, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v47, vcc, v5, v1, vcc +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 2, v4 +; GFX90A-NEXT: v_add_co_u32_e32 v40, vcc, v46, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v41, vcc, 0, v47, vcc +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[30:31] +; GFX90A-NEXT: s_cbranch_vccz .LBB0_33 +; GFX90A-NEXT: ; %bb.20: ; %bb20 +; GFX90A-NEXT: global_load_sbyte v0, v[40:41], off offset:1024 +; GFX90A-NEXT: v_add_co_u32_e32 v42, vcc, 0x400, v40 +; GFX90A-NEXT: v_addc_co_u32_e32 v43, vcc, 0, v41, vcc +; GFX90A-NEXT: s_mov_b64 s[34:35], 0 +; GFX90A-NEXT: s_mov_b64 s[38:39], -1 +; GFX90A-NEXT: s_mov_b64 s[56:57], 0 +; GFX90A-NEXT: s_mov_b64 s[54:55], 0 +; GFX90A-NEXT: s_mov_b64 s[52:53], 0 +; GFX90A-NEXT: s_mov_b64 s[50:51], 0 +; GFX90A-NEXT: s_mov_b64 s[48:49], 0 +; GFX90A-NEXT: s_mov_b64 s[46:47], 0 +; GFX90A-NEXT: s_mov_b64 s[44:45], 0 +; GFX90A-NEXT: s_mov_b64 s[42:43], 0 +; GFX90A-NEXT: s_mov_b64 s[40:41], 0 +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr60_vgpr61 +; GFX90A-NEXT: ; implicit-def: $vgpr58_vgpr59 +; GFX90A-NEXT: ; implicit-def: $vgpr56_vgpr57 +; GFX90A-NEXT: ; implicit-def: $vgpr44_vgpr45 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_cmp_lt_i16_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[24:25], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_42 +; GFX90A-NEXT: ; %bb.21: ; %bb27 +; GFX90A-NEXT: global_load_ubyte v0, v[40:41], off offset:2048 +; GFX90A-NEXT: v_add_co_u32_e32 v44, vcc, 0x800, v40 +; GFX90A-NEXT: v_addc_co_u32_e32 v45, vcc, 0, v41, vcc +; GFX90A-NEXT: s_mov_b64 s[40:41], -1 +; GFX90A-NEXT: s_mov_b64 s[60:61], s[36:37] +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr60_vgpr61 +; GFX90A-NEXT: ; implicit-def: $vgpr58_vgpr59 +; GFX90A-NEXT: ; implicit-def: $vgpr56_vgpr57 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[38:39], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_41 +; GFX90A-NEXT: ; %bb.22: ; %bb34 +; GFX90A-NEXT: global_load_ubyte v0, v[40:41], off offset:3072 +; GFX90A-NEXT: v_add_co_u32_e32 v56, vcc, 0xc00, v40 +; GFX90A-NEXT: v_addc_co_u32_e32 v57, vcc, 0, v41, vcc +; GFX90A-NEXT: s_mov_b64 s[42:43], -1 +; GFX90A-NEXT: s_mov_b64 s[60:61], s[36:37] +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr60_vgpr61 +; GFX90A-NEXT: ; implicit-def: $vgpr58_vgpr59 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[40:41], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_40 +; GFX90A-NEXT: ; %bb.23: ; %bb41 +; GFX90A-NEXT: v_add_co_u32_e32 v58, vcc, 0x1000, v40 +; GFX90A-NEXT: s_mov_b64 s[18:19], vcc +; GFX90A-NEXT: v_addc_co_u32_e64 v59, s[18:19], 0, v41, s[18:19] +; GFX90A-NEXT: global_load_ubyte v0, v[58:59], off +; GFX90A-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v41, vcc +; GFX90A-NEXT: s_mov_b64 s[18:19], 0 +; GFX90A-NEXT: s_mov_b64 s[44:45], -1 +; GFX90A-NEXT: s_mov_b64 s[60:61], s[36:37] +; GFX90A-NEXT: s_mov_b64 s[62:63], 0 +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr60_vgpr61 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[42:43], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_39 +; GFX90A-NEXT: ; %bb.24: ; %bb48 +; GFX90A-NEXT: v_add_co_u32_e32 v60, vcc, 0x1400, v40 +; GFX90A-NEXT: s_mov_b64 s[18:19], vcc +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v40 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v41, vcc +; GFX90A-NEXT: global_load_ubyte v0, v[0:1], off offset:1024 +; GFX90A-NEXT: v_addc_co_u32_e64 v61, vcc, 0, v41, s[18:19] +; GFX90A-NEXT: s_mov_b64 s[60:61], -1 +; GFX90A-NEXT: s_mov_b64 s[64:65], s[36:37] +; GFX90A-NEXT: s_mov_b64 s[66:67], 0 +; GFX90A-NEXT: s_mov_b64 s[68:69], 0 +; GFX90A-NEXT: s_mov_b64 s[70:71], 0 +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: s_and_saveexec_b64 s[18:19], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_38 +; GFX90A-NEXT: ; %bb.25: ; %bb55 +; GFX90A-NEXT: s_bitcmp1_b32 s33, 16 +; GFX90A-NEXT: s_cselect_b64 s[64:65], -1, 0 +; GFX90A-NEXT: v_add_co_u32_e32 v62, vcc, 0x1800, v40 +; GFX90A-NEXT: s_xor_b64 s[48:49], s[64:65], -1 +; GFX90A-NEXT: v_addc_co_u32_e32 v63, vcc, 0, v41, vcc +; GFX90A-NEXT: s_and_b64 vcc, exec, s[48:49] +; GFX90A-NEXT: ; implicit-def: $agpr0 +; GFX90A-NEXT: ; implicit-def: $vgpr14 +; GFX90A-NEXT: ; implicit-def: $agpr0_lo16 +; GFX90A-NEXT: ; implicit-def: $agpr0_hi16 +; GFX90A-NEXT: ; implicit-def: $vgpr14_lo16 +; GFX90A-NEXT: ; implicit-def: $vgpr14_hi16 +; GFX90A-NEXT: s_cbranch_vccz .LBB0_35 +; GFX90A-NEXT: ; %bb.26: ; %bb63 +; GFX90A-NEXT: s_mov_b64 s[44:45], 0 +; GFX90A-NEXT: s_mov_b64 vcc, vcc +; GFX90A-NEXT: s_cbranch_vccz .LBB0_34 +; GFX90A-NEXT: ; %bb.27: ; %bb68 +; GFX90A-NEXT: v_lshlrev_b64 v[0:1], 3, v[4:5] +; GFX90A-NEXT: s_and_b64 vcc, exec, s[48:49] +; GFX90A-NEXT: s_cbranch_vccz .LBB0_68 +; GFX90A-NEXT: ; %bb.28: ; %bb73 +; GFX90A-NEXT: global_load_ubyte v5, v[0:1], off offset:2048 +; GFX90A-NEXT: v_add_co_u32_e32 v6, vcc, 0x800, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc +; GFX90A-NEXT: s_mov_b64 s[48:49], 0 +; GFX90A-NEXT: s_mov_b64 s[54:55], -1 +; GFX90A-NEXT: s_mov_b64 s[50:51], s[36:37] +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 +; GFX90A-NEXT: s_and_saveexec_b64 s[60:61], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_69 +; GFX90A-NEXT: ; %bb.29: ; %bb80 +; GFX90A-NEXT: s_bfe_u32 s17, s33, 0x10018 +; GFX90A-NEXT: v_add_co_u32_e32 v8, vcc, 0x1000, v0 +; GFX90A-NEXT: s_cmp_eq_u32 s17, 0 +; GFX90A-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v1, vcc +; GFX90A-NEXT: s_cbranch_scc0 .LBB0_70 +; GFX90A-NEXT: ; %bb.30: ; %bb85 +; GFX90A-NEXT: v_or_b32_e32 v10, 1, v8 +; GFX90A-NEXT: v_mov_b32_e32 v11, v9 +; GFX90A-NEXT: flat_load_ubyte v5, v[10:11] +; GFX90A-NEXT: s_mov_b32 s17, 0 +; GFX90A-NEXT: s_mov_b64 s[50:51], -1 +; GFX90A-NEXT: s_mov_b64 s[62:63], s[36:37] +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $vgpr13 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 +; GFX90A-NEXT: s_and_saveexec_b64 s[52:53], vcc +; GFX90A-NEXT: s_cbranch_execz .LBB0_32 +; GFX90A-NEXT: ; %bb.31: ; %bb90 +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: ds_read_b64 v[16:17], v5 +; GFX90A-NEXT: v_mov_b32_e32 v5, s21 +; GFX90A-NEXT: ds_read_b64 v[18:19], v5 +; GFX90A-NEXT: v_mov_b32_e32 v5, s22 +; GFX90A-NEXT: ds_read_b64 v[14:15], v5 +; GFX90A-NEXT: v_mov_b32_e32 v5, s58 +; GFX90A-NEXT: v_cndmask_b32_e64 v54, 0, 1, s[64:65] +; GFX90A-NEXT: v_alignbit_b32 v13, s59, v5, 1 +; GFX90A-NEXT: s_waitcnt lgkmcnt(1) +; GFX90A-NEXT: v_alignbit_b32 v30, v19, v18, 1 +; GFX90A-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[12:13] +; GFX90A-NEXT: v_alignbit_b32 v17, v17, v16, 1 +; GFX90A-NEXT: s_xor_b64 s[50:51], exec, -1 +; GFX90A-NEXT: s_or_b64 s[62:63], s[36:37], exec +; GFX90A-NEXT: .LBB0_32: ; %Flow31 +; GFX90A-NEXT: s_or_b64 exec, exec, s[52:53] +; GFX90A-NEXT: v_mov_b32_e32 v12, v16 +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v12 +; GFX90A-NEXT: s_mov_b64 s[52:53], 0 +; GFX90A-NEXT: v_accvgpr_write_b32 a1, v13 +; GFX90A-NEXT: s_branch .LBB0_71 +; GFX90A-NEXT: .LBB0_33: +; GFX90A-NEXT: s_mov_b64 s[34:35], -1 +; GFX90A-NEXT: s_mov_b64 s[56:57], 0 +; GFX90A-NEXT: s_mov_b64 s[54:55], 0 +; GFX90A-NEXT: s_mov_b64 s[52:53], 0 +; GFX90A-NEXT: s_mov_b64 s[50:51], 0 +; GFX90A-NEXT: s_mov_b64 s[48:49], 0 +; GFX90A-NEXT: s_mov_b64 s[46:47], 0 +; GFX90A-NEXT: s_mov_b64 s[44:45], 0 +; GFX90A-NEXT: s_mov_b64 s[42:43], 0 +; GFX90A-NEXT: s_mov_b64 s[40:41], 0 +; GFX90A-NEXT: s_mov_b64 s[38:39], 0 +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr62_vgpr63 +; GFX90A-NEXT: ; implicit-def: $vgpr60_vgpr61 +; GFX90A-NEXT: ; implicit-def: $vgpr58_vgpr59 +; GFX90A-NEXT: ; implicit-def: $vgpr56_vgpr57 +; GFX90A-NEXT: ; implicit-def: $vgpr44_vgpr45 +; GFX90A-NEXT: ; implicit-def: $vgpr42_vgpr43 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: s_branch .LBB0_43 +; GFX90A-NEXT: .LBB0_34: +; GFX90A-NEXT: s_mov_b64 s[46:47], -1 +; GFX90A-NEXT: .LBB0_35: +; GFX90A-NEXT: s_mov_b64 s[50:51], s[36:37] +; GFX90A-NEXT: s_mov_b64 s[48:49], 0 +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: .LBB0_36: ; %Flow26 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: .LBB0_37: ; %Flow26 +; GFX90A-NEXT: s_and_b64 s[70:71], s[44:45], exec +; GFX90A-NEXT: s_and_b64 s[66:67], s[48:49], exec +; GFX90A-NEXT: s_andn2_b64 s[44:45], s[36:37], exec +; GFX90A-NEXT: s_and_b64 s[48:49], s[50:51], exec +; GFX90A-NEXT: s_xor_b64 s[60:61], exec, -1 +; GFX90A-NEXT: s_and_b64 s[68:69], s[46:47], exec +; GFX90A-NEXT: s_and_b64 s[54:55], s[54:55], exec +; GFX90A-NEXT: s_and_b64 s[56:57], s[56:57], exec +; GFX90A-NEXT: s_and_b64 s[46:47], s[52:53], exec +; GFX90A-NEXT: s_or_b64 s[64:65], s[44:45], s[48:49] +; GFX90A-NEXT: .LBB0_38: ; %Flow25 +; GFX90A-NEXT: s_or_b64 exec, exec, s[18:19] +; GFX90A-NEXT: s_and_b64 s[18:19], s[46:47], exec +; GFX90A-NEXT: s_andn2_b64 s[46:47], s[36:37], exec +; GFX90A-NEXT: s_and_b64 s[58:59], s[64:65], exec +; GFX90A-NEXT: s_xor_b64 s[44:45], exec, -1 +; GFX90A-NEXT: s_and_b64 s[62:63], s[60:61], exec +; GFX90A-NEXT: s_and_b64 s[48:49], s[70:71], exec +; GFX90A-NEXT: s_and_b64 s[50:51], s[68:69], exec +; GFX90A-NEXT: s_and_b64 s[52:53], s[66:67], exec +; GFX90A-NEXT: s_and_b64 s[54:55], s[54:55], exec +; GFX90A-NEXT: s_and_b64 s[56:57], s[56:57], exec +; GFX90A-NEXT: s_or_b64 s[60:61], s[46:47], s[58:59] +; GFX90A-NEXT: .LBB0_39: ; %Flow24 +; GFX90A-NEXT: s_or_b64 exec, exec, s[42:43] +; GFX90A-NEXT: s_andn2_b64 s[58:59], s[36:37], exec +; GFX90A-NEXT: s_and_b64 s[60:61], s[60:61], exec +; GFX90A-NEXT: v_mov_b32_e32 v59, v20 +; GFX90A-NEXT: s_xor_b64 s[42:43], exec, -1 +; GFX90A-NEXT: s_and_b64 s[44:45], s[44:45], exec +; GFX90A-NEXT: s_and_b64 s[46:47], s[62:63], exec +; GFX90A-NEXT: s_and_b64 s[48:49], s[48:49], exec +; GFX90A-NEXT: s_and_b64 s[50:51], s[50:51], exec +; GFX90A-NEXT: s_and_b64 s[52:53], s[52:53], exec +; GFX90A-NEXT: s_and_b64 s[54:55], s[54:55], exec +; GFX90A-NEXT: s_and_b64 s[56:57], s[56:57], exec +; GFX90A-NEXT: s_and_b64 s[18:19], s[18:19], exec +; GFX90A-NEXT: s_or_b64 s[60:61], s[58:59], s[60:61] +; GFX90A-NEXT: .LBB0_40: ; %Flow23 +; GFX90A-NEXT: s_or_b64 exec, exec, s[40:41] +; GFX90A-NEXT: s_andn2_b64 s[58:59], s[36:37], exec +; GFX90A-NEXT: s_and_b64 s[60:61], s[60:61], exec +; GFX90A-NEXT: s_xor_b64 s[40:41], exec, -1 +; GFX90A-NEXT: s_and_b64 s[42:43], s[42:43], exec +; GFX90A-NEXT: s_and_b64 s[44:45], s[44:45], exec +; GFX90A-NEXT: s_and_b64 s[46:47], s[46:47], exec +; GFX90A-NEXT: s_and_b64 s[48:49], s[48:49], exec +; GFX90A-NEXT: s_and_b64 s[50:51], s[50:51], exec +; GFX90A-NEXT: s_and_b64 s[52:53], s[52:53], exec +; GFX90A-NEXT: s_and_b64 s[54:55], s[54:55], exec +; GFX90A-NEXT: s_and_b64 s[56:57], s[56:57], exec +; GFX90A-NEXT: s_and_b64 s[18:19], s[18:19], exec +; GFX90A-NEXT: s_or_b64 s[60:61], s[58:59], s[60:61] +; GFX90A-NEXT: .LBB0_41: ; %Flow22 +; GFX90A-NEXT: s_or_b64 exec, exec, s[38:39] +; GFX90A-NEXT: s_andn2_b64 s[36:37], s[36:37], exec +; GFX90A-NEXT: s_and_b64 s[58:59], s[60:61], exec +; GFX90A-NEXT: s_xor_b64 s[38:39], exec, -1 +; GFX90A-NEXT: s_and_b64 s[40:41], s[40:41], exec +; GFX90A-NEXT: s_and_b64 s[42:43], s[42:43], exec +; GFX90A-NEXT: s_and_b64 s[44:45], s[44:45], exec +; GFX90A-NEXT: s_and_b64 s[46:47], s[46:47], exec +; GFX90A-NEXT: s_and_b64 s[48:49], s[48:49], exec +; GFX90A-NEXT: s_and_b64 s[50:51], s[50:51], exec +; GFX90A-NEXT: s_and_b64 s[52:53], s[52:53], exec +; GFX90A-NEXT: s_and_b64 s[54:55], s[54:55], exec +; GFX90A-NEXT: s_and_b64 s[56:57], s[56:57], exec +; GFX90A-NEXT: s_and_b64 s[18:19], s[18:19], exec +; GFX90A-NEXT: s_or_b64 s[36:37], s[36:37], s[58:59] +; GFX90A-NEXT: .LBB0_42: ; %Flow20 +; GFX90A-NEXT: s_or_b64 exec, exec, s[24:25] +; GFX90A-NEXT: .LBB0_43: ; %Flow20 +; GFX90A-NEXT: v_mov_b32_e32 v21, s17 +; GFX90A-NEXT: v_mov_b32_e32 v20, s17 +; GFX90A-NEXT: v_mov_b32_e32 v23, s17 +; GFX90A-NEXT: v_mov_b32_e32 v22, s17 +; GFX90A-NEXT: v_mov_b32_e32 v25, s17 +; GFX90A-NEXT: v_mov_b32_e32 v24, s17 +; GFX90A-NEXT: v_mov_b32_e32 v27, s17 +; GFX90A-NEXT: v_mov_b32_e32 v26, s17 +; GFX90A-NEXT: s_mov_b64 s[58:59], 0 +; GFX90A-NEXT: s_and_saveexec_b64 s[24:25], s[36:37] +; GFX90A-NEXT: s_cbranch_execz .LBB0_4 +; GFX90A-NEXT: .LBB0_44: ; %bb140 +; GFX90A-NEXT: s_mov_b64 s[36:37], -1 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[30:31] +; GFX90A-NEXT: s_cbranch_vccz .LBB0_50 +; GFX90A-NEXT: ; %bb.45: ; %bb174 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_or_b32_e32 v28, 1, v26 +; GFX90A-NEXT: v_or_b32_e32 v38, v28, v24 +; GFX90A-NEXT: v_or_b32_e32 v36, v38, v22 +; GFX90A-NEXT: v_cndmask_b32_e64 v32, v36, 0, s[12:13] +; GFX90A-NEXT: v_accvgpr_read_b32 v13, a1 +; GFX90A-NEXT: v_or_b32_e32 v50, v32, v20 +; GFX90A-NEXT: v_accvgpr_read_b32 v12, a0 +; GFX90A-NEXT: v_or_b32_e32 v48, v50, v12 +; GFX90A-NEXT: v_or_b32_e32 v34, v48, v14 +; GFX90A-NEXT: v_cndmask_b32_e64 v52, 0, v34, s[12:13] +; GFX90A-NEXT: s_mov_b64 s[12:13], -1 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[28:29] +; GFX90A-NEXT: s_cbranch_vccz .LBB0_47 +; GFX90A-NEXT: ; %bb.46: ; %bb196 +; GFX90A-NEXT: v_or_b32_e32 v5, v52, v18 +; GFX90A-NEXT: v_or_b32_e32 v12, v5, v16 +; GFX90A-NEXT: v_mov_b32_e32 v13, 0 +; GFX90A-NEXT: ds_write_b64 v13, v[12:13] +; GFX90A-NEXT: s_mov_b64 s[12:13], 0 +; GFX90A-NEXT: .LBB0_47: ; %Flow +; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[12:13] +; GFX90A-NEXT: s_cbranch_vccnz .LBB0_49 +; GFX90A-NEXT: ; %bb.48: ; %bb186 +; GFX90A-NEXT: v_lshlrev_b64 v[2:3], 3, v[2:3] +; GFX90A-NEXT: v_mov_b32_e32 v5, s27 +; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, s26, v2 +; GFX90A-NEXT: v_mov_b32_e32 v29, 0 +; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v3, vcc +; GFX90A-NEXT: v_mov_b32_e32 v39, v29 +; GFX90A-NEXT: v_mov_b32_e32 v37, v29 +; GFX90A-NEXT: v_mov_b32_e32 v5, s21 +; GFX90A-NEXT: v_mov_b32_e32 v12, s22 +; GFX90A-NEXT: v_mov_b32_e32 v51, v29 +; GFX90A-NEXT: v_mov_b32_e32 v49, v29 +; GFX90A-NEXT: v_mov_b32_e32 v33, v29 +; GFX90A-NEXT: v_mov_b32_e32 v53, v29 +; GFX90A-NEXT: v_mov_b32_e32 v35, v29 +; GFX90A-NEXT: ds_write_b64 v29, v[28:29] +; GFX90A-NEXT: ds_write_b64 v5, v[38:39] +; GFX90A-NEXT: ds_write_b64 v12, v[36:37] +; GFX90A-NEXT: ds_write_b64 v29, v[50:51] +; GFX90A-NEXT: ds_write_b64 v5, v[48:49] +; GFX90A-NEXT: ds_write_b64 v29, v[32:33] +; GFX90A-NEXT: ds_write_b64 v5, v[52:53] +; GFX90A-NEXT: ds_write_b64 v29, v[34:35] +; GFX90A-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v2, off, s[0:3], 0 +; GFX90A-NEXT: .LBB0_49: ; %Flow9 +; GFX90A-NEXT: s_mov_b64 s[36:37], 0 +; GFX90A-NEXT: .LBB0_50: ; %Flow13 +; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[36:37] +; GFX90A-NEXT: s_cbranch_vccnz .LBB0_54 +; GFX90A-NEXT: ; %bb.51: ; %bb159 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execz .LBB0_53 +; GFX90A-NEXT: ; %bb.52: ; %bb161 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_or_b32_e32 v2, v23, v25 +; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 +; GFX90A-NEXT: v_or_b32_e32 v2, v2, v27 +; GFX90A-NEXT: v_or_b32_e32 v3, v3, v21 +; GFX90A-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: v_cmp_eq_u16_sdwa vcc, v54, v3 src0_sel:BYTE_0 src1_sel:DWORD +; GFX90A-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX90A-NEXT: v_or_b32_e32 v4, v30, v15 +; GFX90A-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX90A-NEXT: v_cmp_eq_u16_sdwa vcc, v19, v3 src0_sel:BYTE_0 src1_sel:DWORD +; GFX90A-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX90A-NEXT: v_or_b32_e32 v2, v2, v17 +; GFX90A-NEXT: ds_write2_b32 v3, v2, v3 offset1:1 +; GFX90A-NEXT: .LBB0_53: ; %Flow10 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[12:13], s[12:13] +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: .LBB0_54: ; %Flow14 +; GFX90A-NEXT: s_mov_b64 s[58:59], exec +; GFX90A-NEXT: s_or_b64 exec, exec, s[24:25] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[18:19] +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_5 +; GFX90A-NEXT: .LBB0_55: ; %Flow33 +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[56:57] +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execz .LBB0_6 +; GFX90A-NEXT: .LBB0_56: ; %bb84 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v8, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[54:55] +; GFX90A-NEXT: s_xor_b64 s[12:13], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_7 +; GFX90A-NEXT: .LBB0_57: ; %Flow35 +; GFX90A-NEXT: s_or_b64 exec, exec, s[12:13] +; GFX90A-NEXT: s_and_saveexec_b64 s[12:13], s[52:53] +; GFX90A-NEXT: s_xor_b64 s[36:37], exec, s[12:13] +; GFX90A-NEXT: s_cbranch_execz .LBB0_8 +; GFX90A-NEXT: .LBB0_58: ; %bb72 +; GFX90A-NEXT: s_add_u32 s8, s8, 48 +; GFX90A-NEXT: s_addc_u32 s9, s9, 0 +; GFX90A-NEXT: s_getpc_b64 s[12:13] +; GFX90A-NEXT: s_add_u32 s12, s12, f2@gotpcrel32@lo+4 +; GFX90A-NEXT: s_addc_u32 s13, s13, f2@gotpcrel32@hi+12 +; GFX90A-NEXT: s_load_dwordx2 s[18:19], s[12:13], 0x0 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: s_or_b64 exec, exec, s[36:37] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[50:51] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_9 +; GFX90A-NEXT: .LBB0_59: ; %Flow37 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[48:49] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_10 +; GFX90A-NEXT: .LBB0_60: ; %bb62 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v63, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v62, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[46:47] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_11 +; GFX90A-NEXT: .LBB0_61: ; %Flow39 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[44:45] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_12 +; GFX90A-NEXT: .LBB0_62: ; %bb47 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v59, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v58, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[42:43] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_13 +; GFX90A-NEXT: .LBB0_63: ; %Flow41 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[40:41] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB0_14 +; GFX90A-NEXT: .LBB0_64: ; %bb33 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v45, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v44, off, s[0:3], 0 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[38:39] +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_15 +; GFX90A-NEXT: .LBB0_65: ; %Flow43 +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[34:35] +; GFX90A-NEXT: s_cbranch_vccnz .LBB0_16 +; GFX90A-NEXT: .LBB0_66: ; %bb19 +; GFX90A-NEXT: s_or_b64 s[58:59], s[58:59], exec +; GFX90A-NEXT: buffer_store_dword v41, off, s[0:3], 0 offset:4 +; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], 0 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[58:59] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_17 +; GFX90A-NEXT: .LBB0_67: ; %UnifiedReturnBlock +; GFX90A-NEXT: s_endpgm +; GFX90A-NEXT: .LBB0_68: +; GFX90A-NEXT: s_mov_b64 s[48:49], -1 +; GFX90A-NEXT: s_mov_b64 s[50:51], s[36:37] +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX90A-NEXT: s_branch .LBB0_36 +; GFX90A-NEXT: .LBB0_69: ; %Flow29 +; GFX90A-NEXT: s_or_b64 exec, exec, s[60:61] +; GFX90A-NEXT: s_branch .LBB0_37 +; GFX90A-NEXT: .LBB0_70: +; GFX90A-NEXT: s_mov_b64 s[50:51], 0 +; GFX90A-NEXT: s_mov_b64 s[52:53], -1 +; GFX90A-NEXT: s_mov_b64 s[62:63], s[36:37] +; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX90A-NEXT: ; implicit-def: $vgpr19 +; GFX90A-NEXT: ; implicit-def: $vgpr17 +; GFX90A-NEXT: ; implicit-def: $vgpr16 +; GFX90A-NEXT: ; implicit-def: $vgpr30 +; GFX90A-NEXT: ; implicit-def: $vgpr18 +; GFX90A-NEXT: ; implicit-def: $vgpr54 +; GFX90A-NEXT: ; implicit-def: $vgpr15 +; GFX90A-NEXT: ; implicit-def: $agpr1 +; GFX90A-NEXT: ; implicit-def: $sgpr17 +; GFX90A-NEXT: .LBB0_71: ; %Flow30 +; GFX90A-NEXT: s_and_b64 s[56:57], s[52:53], exec +; GFX90A-NEXT: s_and_b64 s[52:53], s[50:51], exec +; GFX90A-NEXT: s_andn2_b64 s[50:51], s[36:37], exec +; GFX90A-NEXT: s_and_b64 s[58:59], s[62:63], exec +; GFX90A-NEXT: s_xor_b64 s[54:55], exec, -1 +; GFX90A-NEXT: s_or_b64 s[50:51], s[50:51], s[58:59] +; GFX90A-NEXT: s_or_b64 exec, exec, s[60:61] +; GFX90A-NEXT: s_branch .LBB0_37 +bb: + %i = tail call i32 @llvm.amdgcn.workitem.id.x() + %i11 = icmp eq i32 %i, 0 + %i12 = load i32, ptr addrspace(3) null, align 8 + %i13 = zext i32 %i12 to i64 + %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13 + br i1 %arg3, label %bb15, label %bb103 + +bb15: + %i16 = zext i32 %i to i64 + %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16 + %i18 = ptrtoint ptr addrspace(1) %i17 to i64 + br i1 %arg4, label %bb19, label %bb20 + +bb19: + store i64 %i18, ptr addrspace(5) null, align 8 + unreachable + +bb20: + %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256 + %i22 = ptrtoint ptr addrspace(1) %i21 to i64 + %i23 = inttoptr i64 %i22 to ptr + %i24 = load i8, ptr %i23, align 1 + %i25 = icmp sge i8 0, %i24 + br i1 %i25, label %bb26, label %bb27 + +bb26: + store i64 %i22, ptr addrspace(5) null, align 8 + unreachable + +bb27: + %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512 + %i29 = ptrtoint ptr addrspace(1) %i28 to i64 + %i30 = inttoptr i64 %i29 to ptr + %i31 = load i8, ptr %i30, align 1 + %i32 = icmp ne i8 %i31, 0 + br i1 %i32, label %bb33, label %bb34 + +bb33: + store i64 %i29, ptr addrspace(5) null, align 8 + unreachable + +bb34: + %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768 + %i36 = ptrtoint ptr addrspace(1) %i35 to i64 + %i37 = inttoptr i64 %i36 to ptr + %i38 = load i8, ptr %i37, align 1 + %i39 = icmp ne i8 %i38, 0 + br i1 %i39, label %bb40, label %bb41 + +bb40: + store i64 %i36, ptr addrspace(5) null, align 8 + unreachable + +bb41: + %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024 + %i43 = ptrtoint ptr addrspace(1) %i42 to i64 + %i44 = inttoptr i64 %i43 to ptr + %i45 = load i8, ptr %i44, align 1 + %i46 = icmp ne i8 %i45, 0 + br i1 %i46, label %bb47, label %bb48 + +bb47: + store i64 %i43, ptr addrspace(5) null, align 8 + unreachable + +bb48: + %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280 + %i50 = ptrtoint ptr addrspace(1) %i49 to i64 + %i51 = inttoptr i64 %i50 to ptr + %i52 = load i8, ptr %i51, align 1 + %i53 = icmp ne i8 %i52, 0 + br i1 %i53, label %bb54, label %bb55 + +bb54: + store i64 %i50, ptr addrspace(5) null, align 8 + unreachable + +bb55: + %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536 + %i57 = ptrtoint ptr addrspace(1) %i56 to i64 + %i58 = or i64 %i57, 1 + %i59 = inttoptr i64 %i58 to ptr + br i1 true, label %bb61, label %bb60 + +bb60: + br label %bb63 + +bb61: + br i1 %arg5, label %bb62, label %bb63 + +bb62: + store i64 %i57, ptr addrspace(5) null, align 8 + unreachable + +bb63: + %i64 = ptrtoint ptr addrspace(1) %i14 to i64 + br i1 true, label %bb66, label %bb65 + +bb65: + br label %bb68 + +bb66: + br i1 %arg5, label %bb67, label %bb68 + +bb67: + store i64 %i64, ptr addrspace(5) null, align 8 + unreachable + +bb68: + %i69 = zext i1 %arg5 to i8 + %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16 + %i71 = ptrtoint ptr addrspace(1) %i70 to i64 + br i1 %arg5, label %bb72, label %bb73 + +bb72: + call void @f2(i64 %i71) + unreachable + +bb73: + %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256 + %i75 = ptrtoint ptr addrspace(1) %i74 to i64 + %i76 = inttoptr i64 %i75 to ptr + %i77 = load i8, ptr %i76, align 1 + %i78 = icmp ne i8 %i77, 0 + br i1 %i78, label %bb79, label %bb80 + +bb79: + store i64 %i75, ptr addrspace(5) null, align 8 + unreachable + +bb80: + %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512 + %i82 = ptrtoint ptr addrspace(1) %i81 to i64 + %i83 = or i64 %i82, 1 + br i1 %arg6, label %bb84, label %bb85 + +bb84: + store i64 %i82, ptr addrspace(5) null, align 8 + unreachable + +bb85: + %i86 = inttoptr i64 %i83 to ptr + %i87 = load i8, ptr %i86, align 1 + %i88 = icmp ne i8 %i87, 0 + br i1 %i88, label %bb89, label %bb90 + +bb89: + store i64 %i83, ptr addrspace(5) null, align 8 + unreachable + +bb90: + %i91 = load i64, ptr addrspace(3) null, align 8 + %i92 = load i64, ptr addrspace(3) %arg8, align 8 + %i93 = load i64, ptr addrspace(3) %arg7, align 8 + %i94 = trunc i64 %i91 to i32 + %i95 = lshr i64 %arg2, 1 + %i96 = trunc i64 %i95 to i32 + %i97 = trunc i64 %i92 to i32 + %i98 = lshr i64 %i92, 32 + %i99 = trunc i64 %i98 to i32 + %i100 = trunc i64 %i93 to i32 + %i101 = lshr i64 %i93, 1 + %i102 = trunc i64 %i101 to i32 + br label %bb127 + +bb103: + br i1 %arg4, label %bb104, label %bb105 + +bb104: + ret void + +bb105: + %i106 = load i64, ptr addrspace(3) null, align 8 + %i107 = load i64, ptr addrspace(3) %arg9, align 8 + %i108 = load i64, ptr addrspace(3) %arg7, align 8 + %i109 = load i64, ptr addrspace(3) %arg10, align 8 + %i110 = load i64, ptr addrspace(3) %arg8, align 8 + %i111 = trunc i64 %i110 to i32 + %i112 = lshr i64 %i110, 32 + %i113 = trunc i64 %i112 to i32 + %i114 = trunc i64 %i106 to i32 + %i115 = lshr i64 %i106, 32 + %i116 = trunc i64 %i115 to i32 + %i117 = trunc i64 %i107 to i32 + %i118 = lshr i64 %i107, 32 + %i119 = trunc i64 %i118 to i32 + %i120 = trunc i64 %i108 to i32 + %i121 = lshr i64 %i108, 32 + %i122 = trunc i64 %i121 to i32 + %i123 = trunc i64 %i109 to i32 + %i124 = lshr i64 %i109, 32 + %i125 = trunc i64 %i124 to i32 + br i1 false, label %bb105.bb127_crit_edge, label %bb140 + +bb105.bb127_crit_edge: + br label %bb127 + +bb127: + %i128 = phi i32 [ %i94, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i129 = phi i32 [ %i96, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i130 = phi i32 [ %i97, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i131 = phi i32 [ %i99, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i132 = phi i8 [ %i69, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i133 = phi i32 [ %i100, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i134 = phi i32 [ %i102, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i135 = phi i64 [ %i91, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i136 = zext i1 %arg3 to i8 + %i137 = trunc i64 %i135 to i32 + %i138 = lshr i64 %i135, 1 + %i139 = trunc i64 %i138 to i32 + br label %bb140 + +bb140: + %i141 = phi i32 [ 0, %bb127 ], [ %i111, %bb105 ] + %i142 = phi i32 [ 0, %bb127 ], [ %i113, %bb105 ] + %i143 = phi i32 [ 0, %bb127 ], [ %i114, %bb105 ] + %i144 = phi i32 [ 0, %bb127 ], [ %i116, %bb105 ] + %i145 = phi i32 [ 0, %bb127 ], [ %i117, %bb105 ] + %i146 = phi i32 [ 0, %bb127 ], [ %i119, %bb105 ] + %i147 = phi i32 [ 0, %bb127 ], [ %i120, %bb105 ] + %i148 = phi i32 [ 0, %bb127 ], [ %i122, %bb105 ] + %i149 = phi i32 [ %i128, %bb127 ], [ %i123, %bb105 ] + %i150 = phi i32 [ %i129, %bb127 ], [ %i125, %bb105 ] + %i151 = phi i32 [ %i130, %bb127 ], [ 0, %bb105 ] + %i152 = phi i32 [ %i131, %bb127 ], [ 0, %bb105 ] + %i153 = phi i8 [ %i132, %bb127 ], [ 0, %bb105 ] + %i154 = phi i32 [ %i133, %bb127 ], [ 0, %bb105 ] + %i155 = phi i32 [ %i134, %bb127 ], [ 0, %bb105 ] + %i156 = phi i32 [ %i137, %bb127 ], [ 0, %bb105 ] + %i157 = phi i32 [ %i139, %bb127 ], [ 0, %bb105 ] + %i158 = phi i8 [ %i136, %bb127 ], [ 0, %bb105 ] + br i1 %arg4, label %bb159, label %bb174 + +bb159: + br i1 %i11, label %bb160, label %bb161 + +bb160: + unreachable + +bb161: + %i162 = or i32 %i146, %i144 + %i163 = or i32 %i162, %i142 + %i164 = or i32 %i150, %i148 + %i165 = or i32 %i164, %i163 + %i166 = icmp ne i8 %i153, 0 + %i167 = select i1 %i166, i32 0, i32 %i165 + %i168 = or i32 %i155, %i152 + %i169 = or i32 %i168, %i167 + %i170 = icmp ne i8 %i158, 0 + %i171 = select i1 %i170, i32 0, i32 %i169 + %i172 = or i32 %i171, %i157 + %i173 = zext i32 %i172 to i64 + store i64 %i173, ptr addrspace(3) null, align 4 + unreachable + +bb174: + %i175 = or i32 1, %i141 + %i176 = or i32 %i175, %i143 + %i177 = or i32 %i176, %i145 + %i178 = select i1 %arg3, i32 0, i32 %i177 + %i179 = or i32 %i178, %i147 + %i180 = or i32 %i179, %i149 + %i181 = or i32 %i180, %i151 + %i182 = select i1 %arg3, i32 %i181, i32 0 + %i183 = or i32 %i182, %i154 + %i184 = or i32 %i183, %i156 + %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13 + br i1 %arg3, label %bb186, label %bb196 + +bb186: + %i187 = zext i32 %i175 to i64 + %i188 = zext i32 %i176 to i64 + %i189 = zext i32 %i177 to i64 + %i190 = zext i32 %i179 to i64 + %i191 = zext i32 %i180 to i64 + %i192 = zext i32 %i178 to i64 + %i193 = zext i32 %i182 to i64 + %i194 = zext i32 %i181 to i64 + store i64 %i187, ptr addrspace(3) null, align 8 + store i64 %i188, ptr addrspace(3) %arg7, align 8 + store i64 %i189, ptr addrspace(3) %arg8, align 8 + store i64 %i190, ptr addrspace(3) null, align 8 + store i64 %i191, ptr addrspace(3) %arg7, align 8 + store i64 %i192, ptr addrspace(3) null, align 8 + store i64 %i193, ptr addrspace(3) %arg7, align 8 + store i64 %i194, ptr addrspace(3) null, align 8 + %i195 = ptrtoint ptr addrspace(1) %i185 to i64 + store i64 %i195, ptr addrspace(5) null, align 8 + unreachable + +bb196: + %i197 = zext i32 %i184 to i64 + store i64 %i197, ptr addrspace(3) null, align 8 + unreachable +} + +declare void @f2(i64) + +declare i32 @llvm.amdgcn.workitem.id.x()