diff --git a/llvm/test/CodeGen/AMDGPU/swdev373493.ll b/llvm/test/CodeGen/AMDGPU/swdev373493.ll --- a/llvm/test/CodeGen/AMDGPU/swdev373493.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev373493.ll @@ -1,12 +1,46 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck %s +; RUN: llc -o - %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a -verify-machineinstrs | FileCheck %s @global = external protected addrspace(4) externally_initialized global [4096 x i64], align 16 -define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6) unnamed_addr align 2 { +define hidden amdgpu_gfx void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6, i32 inreg %arg7) unnamed_addr align 2 { ; CHECK-LABEL: bar: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s38, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: v_writelane_b32 v40, s4, 0 +; CHECK-NEXT: v_writelane_b32 v40, s5, 1 +; CHECK-NEXT: v_writelane_b32 v40, s6, 2 +; CHECK-NEXT: v_writelane_b32 v40, s7, 3 +; CHECK-NEXT: v_writelane_b32 v40, s8, 4 +; CHECK-NEXT: v_writelane_b32 v40, s9, 5 +; CHECK-NEXT: v_writelane_b32 v40, s10, 6 +; CHECK-NEXT: v_writelane_b32 v40, s11, 7 +; CHECK-NEXT: v_writelane_b32 v40, s12, 8 +; CHECK-NEXT: v_writelane_b32 v40, s13, 9 +; CHECK-NEXT: v_writelane_b32 v40, s14, 10 +; CHECK-NEXT: v_writelane_b32 v40, s15, 11 +; CHECK-NEXT: v_writelane_b32 v40, s16, 12 +; CHECK-NEXT: v_writelane_b32 v40, s17, 13 +; CHECK-NEXT: v_writelane_b32 v40, s18, 14 +; CHECK-NEXT: v_writelane_b32 v40, s19, 15 +; CHECK-NEXT: v_writelane_b32 v40, s20, 16 +; CHECK-NEXT: v_writelane_b32 v40, s21, 17 +; CHECK-NEXT: v_writelane_b32 v40, s22, 18 +; CHECK-NEXT: v_writelane_b32 v40, s23, 19 +; CHECK-NEXT: v_writelane_b32 v40, s24, 20 +; CHECK-NEXT: v_writelane_b32 v40, s25, 21 +; CHECK-NEXT: v_writelane_b32 v40, s26, 22 +; CHECK-NEXT: v_writelane_b32 v40, s27, 23 +; CHECK-NEXT: v_writelane_b32 v40, s28, 24 +; CHECK-NEXT: v_writelane_b32 v40, s29, 25 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s30, 26 +; CHECK-NEXT: v_writelane_b32 v40, s31, 27 ; CHECK-NEXT: v_mov_b32_e32 v15, v12 ; CHECK-NEXT: v_mov_b32_e32 v14, v11 ; CHECK-NEXT: v_mov_b32_e32 v13, v10 @@ -20,23 +54,28 @@ ; CHECK-NEXT: s_cmp_lt_i32 s4, 3 ; CHECK-NEXT: s_cbranch_scc0 .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %LeafBlock +; CHECK-NEXT: s_cmp_lg_u32 s4, 1 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 ; CHECK-NEXT: ; %bb.2: ; %bb7 ; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1] -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, global@rel32@lo+1948 -; CHECK-NEXT: s_addc_u32 s17, s17, global@rel32@hi+1956 +; CHECK-NEXT: s_getpc_b64 s[34:35] +; CHECK-NEXT: s_add_u32 s34, s34, global@rel32@lo+1948 +; CHECK-NEXT: s_addc_u32 s35, s35, global@rel32@hi+1956 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 ; CHECK-NEXT: v_mov_b32_e32 v5, 0 -; CHECK-NEXT: v_mov_b32_e32 v0, s16 -; CHECK-NEXT: v_mov_b32_e32 v1, s17 -; CHECK-NEXT: s_getpc_b64 s[18:19] -; CHECK-NEXT: s_add_u32 s18, s18, eggs@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s19, s19, eggs@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[18:19] +; CHECK-NEXT: v_mov_b32_e32 v0, s34 +; CHECK-NEXT: v_mov_b32_e32 v1, s35 +; CHECK-NEXT: s_getpc_b64 s[36:37] +; CHECK-NEXT: s_add_u32 s36, s36, eggs@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s37, s37, eggs@rel32@hi+12 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[36:37] +; CHECK-NEXT: s_branch .LBB0_5 ; CHECK-NEXT: .LBB0_3: ; %LeafBlock1 +; CHECK-NEXT: s_cmp_eq_u32 s4, 3 ; CHECK-NEXT: s_cbranch_scc0 .LBB0_5 ; CHECK-NEXT: ; %bb.4: ; %bb8 ; CHECK-NEXT: v_mov_b32_e32 v0, v1 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 ; CHECK-NEXT: v_mov_b32_e32 v1, v2 ; CHECK-NEXT: v_mov_b32_e32 v2, v6 ; CHECK-NEXT: v_mov_b32_e32 v3, v7 @@ -48,14 +87,48 @@ ; CHECK-NEXT: v_mov_b32_e32 v9, v13 ; CHECK-NEXT: v_mov_b32_e32 v10, v14 ; CHECK-NEXT: v_mov_b32_e32 v11, v15 -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[16:17] +; CHECK-NEXT: s_getpc_b64 s[34:35] +; CHECK-NEXT: s_add_u32 s34, s34, quux@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s35, s35, quux@rel32@hi+12 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[34:35] ; CHECK-NEXT: .LBB0_5: ; %bb9 +; CHECK-NEXT: v_readlane_b32 s31, v40, 27 +; CHECK-NEXT: v_readlane_b32 s30, v40, 26 +; CHECK-NEXT: v_readlane_b32 s29, v40, 25 +; CHECK-NEXT: v_readlane_b32 s28, v40, 24 +; CHECK-NEXT: v_readlane_b32 s27, v40, 23 +; CHECK-NEXT: v_readlane_b32 s26, v40, 22 +; CHECK-NEXT: v_readlane_b32 s25, v40, 21 +; CHECK-NEXT: v_readlane_b32 s24, v40, 20 +; CHECK-NEXT: v_readlane_b32 s23, v40, 19 +; CHECK-NEXT: v_readlane_b32 s22, v40, 18 +; CHECK-NEXT: v_readlane_b32 s21, v40, 17 +; CHECK-NEXT: v_readlane_b32 s20, v40, 16 +; CHECK-NEXT: v_readlane_b32 s19, v40, 15 +; CHECK-NEXT: v_readlane_b32 s18, v40, 14 +; CHECK-NEXT: v_readlane_b32 s17, v40, 13 +; CHECK-NEXT: v_readlane_b32 s16, v40, 12 +; CHECK-NEXT: v_readlane_b32 s15, v40, 11 +; CHECK-NEXT: v_readlane_b32 s14, v40, 10 +; CHECK-NEXT: v_readlane_b32 s13, v40, 9 +; CHECK-NEXT: v_readlane_b32 s12, v40, 8 +; CHECK-NEXT: v_readlane_b32 s11, v40, 7 +; CHECK-NEXT: v_readlane_b32 s10, v40, 6 +; CHECK-NEXT: v_readlane_b32 s9, v40, 5 +; CHECK-NEXT: v_readlane_b32 s8, v40, 4 +; CHECK-NEXT: v_readlane_b32 s7, v40, 3 +; CHECK-NEXT: v_readlane_b32 s6, v40, 2 +; CHECK-NEXT: v_readlane_b32 s5, v40, 1 +; CHECK-NEXT: v_readlane_b32 s4, v40, 0 +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: s_addk_i32 s32, 0xfc00 +; CHECK-NEXT: s_mov_b32 s33, s38 +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: - switch i32 undef, label %bb9 [ + switch i32 %arg7, label %bb9 [ i32 3, label %bb8 i32 1, label %bb7 ]