Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 5,577 Lines • ▼ Show 20 Lines | |||||
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 | ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 | ||||
; GFX8-NEXT: s_setpc_b64 s[30:31] | ; GFX8-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX9-LABEL: v_fshl_i64_48: | ; GFX9-LABEL: v_fshl_i64_48: | ||||
; GFX9: ; %bb.0: | ; GFX9: ; %bb.0: | ||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX9-NEXT: v_mov_b32_e32 v4, v0 | ; GFX9-NEXT: v_mov_b32_e32 v4, v0 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] | ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] | ||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 | ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 | ||||
; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 | |||||
; GFX9-NEXT: s_setpc_b64 s[30:31] | ; GFX9-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX10-LABEL: v_fshl_i64_48: | ; GFX10-LABEL: v_fshl_i64_48: | ||||
; GFX10: ; %bb.0: | ; GFX10: ; %bb.0: | ||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 | ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 | ||||
; GFX10-NEXT: v_mov_b32_e32 v4, v0 | ; GFX10-NEXT: v_mov_b32_e32 v4, v0 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] | ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] | ||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v4 | ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v1 | ||||
; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 | |||||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ; GFX10-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX11-LABEL: v_fshl_i64_48: | ; GFX11-LABEL: v_fshl_i64_48: | ||||
; GFX11: ; %bb.0: | ; GFX11: ; %bb.0: | ||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 | ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 | ||||
; GFX11-NEXT: v_mov_b32_e32 v4, v0 | ; GFX11-NEXT: v_mov_b32_e32 v4, v0 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] | ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | ||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v4 | ; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v1 | ||||
; GFX11-NEXT: v_or_b32_e32 v1, v2, v1 | |||||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ; GFX11-NEXT: s_setpc_b64 s[30:31] | ||||
%result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48) | %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48) | ||||
ret i64 %result | ret i64 %result | ||||
} | } | ||||
define amdgpu_ps <2 x float> @v_fshl_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { | define amdgpu_ps <2 x float> @v_fshl_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { | ||||
; GFX6-LABEL: v_fshl_i64_ssv: | ; GFX6-LABEL: v_fshl_i64_ssv: | ||||
; GFX6: ; %bb.0: | ; GFX6: ; %bb.0: | ||||
▲ Show 20 Lines • Show All 736 Lines • ▼ Show 20 Lines | |||||
; GFX9-NEXT: v_lshlrev_b64 v[12:13], v14, v[0:1] | ; GFX9-NEXT: v_lshlrev_b64 v[12:13], v14, v[0:1] | ||||
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1] | ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1] | ||||
; GFX9-NEXT: v_or_b32_e32 v8, v8, v10 | ; GFX9-NEXT: v_or_b32_e32 v8, v8, v10 | ||||
; GFX9-NEXT: v_or_b32_e32 v9, v9, v11 | ; GFX9-NEXT: v_or_b32_e32 v9, v9, v11 | ||||
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 | ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v8, v1, v9, vcc | ||||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 | ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc | |||||
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5] | ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5] | ||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v6 | ; GFX9-NEXT: v_cndmask_b32_e32 v13, v8, v3, vcc | ||||
; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 | |||||
; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7] | ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7] | ||||
; GFX9-NEXT: v_lshl_or_b32 v1, v6, 31, v1 | |||||
; GFX9-NEXT: v_sub_u32_e32 v6, 64, v15 | ; GFX9-NEXT: v_sub_u32_e32 v6, 64, v15 | ||||
; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v15 | ; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v15 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1] | ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1] | ||||
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3] | ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3] | ||||
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v15, v[2:3] | ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v15, v[2:3] | ||||
; GFX9-NEXT: v_lshrrev_b64 v[2:3], v14, v[2:3] | ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v14, v[2:3] | ||||
; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 | ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 | ||||
; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 | ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 | ||||
Show All 13 Lines | |||||
; | ; | ||||
; GFX10-LABEL: v_fshl_i128: | ; GFX10-LABEL: v_fshl_i128: | ||||
; GFX10: ; %bb.0: | ; GFX10: ; %bb.0: | ||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 | ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 | ||||
; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8 | ; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8 | ||||
; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8 | ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] | ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] | ||||
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 31, v6 | ; GFX10-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7] | ||||
; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] | ; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18 | ||||
; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v18 | |||||
; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8 | ; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[10:11], v18, v[2:3] | ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] | ||||
; GFX10-NEXT: v_or_b32_e32 v5, v5, v12 | ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 31, v5 | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 | ; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[0:1] | ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] | ||||
; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 | ; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[12:13], v18, v[0:1] | ; GFX10-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1] | ||||
; GFX10-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] | ; GFX10-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] | ||||
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] | ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] | ||||
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 | ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 | ||||
; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 | ; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 | ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] | ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13] | ||||
; GFX10-NEXT: v_or_b32_e32 v11, v9, v11 | ; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 | ||||
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 | ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo | ||||
; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[6:7] | ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13] | ||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 | ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 | ||||
; GFX10-NEXT: v_or_b32_e32 v14, v14, v16 | ; GFX10-NEXT: v_or_b32_e32 v14, v14, v16 | ||||
; GFX10-NEXT: v_or_b32_e32 v15, v15, v17 | ; GFX10-NEXT: v_or_b32_e32 v15, v15, v17 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo | ||||
; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] | ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13] | ||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 | ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v15, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v15, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 | ; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 | ; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 | ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5 | ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s5 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v0, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v1, s4 | ||||
; GFX10-NEXT: v_or_b32_e32 v0, v12, v4 | ; GFX10-NEXT: v_or_b32_e32 v0, v6, v4 | ||||
; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 | ; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 | ||||
; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 | ; GFX10-NEXT: v_or_b32_e32 v2, v2, v8 | ||||
; GFX10-NEXT: v_or_b32_e32 v3, v3, v8 | ; GFX10-NEXT: v_or_b32_e32 v3, v3, v9 | ||||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ; GFX10-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX11-LABEL: v_fshl_i128: | ; GFX11-LABEL: v_fshl_i128: | ||||
; GFX11: ; %bb.0: | ; GFX11: ; %bb.0: | ||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 | ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] | |||||
; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8 | ; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8 | ||||
; GFX11-NEXT: v_xor_b32_e32 v8, -1, v8 | ; GFX11-NEXT: v_xor_b32_e32 v8, -1, v8 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] | ; GFX11-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7] | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | ||||
; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v18 | ; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18 | ||||
; GFX11-NEXT: v_lshlrev_b64 v[10:11], v18, v[2:3] | ; GFX11-NEXT: v_lshl_or_b32 v5, v6, 31, v5 | ||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 | ; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1] | ||||
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 | ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 | ||||
; GFX11-NEXT: v_lshlrev_b32_e32 v12, 31, v6 | |||||
; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8 | ; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[0:1] | ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] | ||||
; GFX11-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] | ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) | ; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 | ||||
; GFX11-NEXT: v_or_b32_e32 v5, v5, v12 | ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo | ||||
; GFX11-NEXT: v_lshlrev_b64 v[12:13], v18, v[0:1] | |||||
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] | |||||
; GFX11-NEXT: v_or_b32_e32 v10, v8, v10 | |||||
; GFX11-NEXT: v_or_b32_e32 v11, v9, v11 | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc_lo | |||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo | |||||
; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19 | ; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19 | ||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] | ; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] | ||||
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19 | ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19 | ||||
; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo | ; GFX11-NEXT: v_or_b32_e32 v10, v10, v8 | ||||
; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] | ; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[6:7] | ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13] | ||||
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] | ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] | ||||
; GFX11-NEXT: v_or_b32_e32 v11, v11, v9 | |||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19 | ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19 | ||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 | ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13] | ||||
; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc_lo | ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo | ||||
; GFX11-NEXT: v_or_b32_e32 v14, v14, v16 | ; GFX11-NEXT: v_or_b32_e32 v14, v14, v16 | ||||
; GFX11-NEXT: v_or_b32_e32 v15, v15, v17 | ; GFX11-NEXT: v_or_b32_e32 v15, v15, v17 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) | ; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 | ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13] | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) | ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v15, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v15, s0 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1 | ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1 | ; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, v5, s1 | ||||
; GFX11-NEXT: v_or_b32_e32 v0, v12, v4 | ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v0, s0 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v1, s0 | |||||
; GFX11-NEXT: v_or_b32_e32 v0, v6, v4 | |||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_or_b32_e32 v1, v7, v5 | ; GFX11-NEXT: v_or_b32_e32 v1, v7, v5 | ||||
; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 | ; GFX11-NEXT: v_or_b32_e32 v2, v2, v8 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | ||||
; GFX11-NEXT: v_or_b32_e32 v3, v3, v8 | ; GFX11-NEXT: v_or_b32_e32 v3, v3, v9 | ||||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ; GFX11-NEXT: s_setpc_b64 s[30:31] | ||||
%result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) | %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) | ||||
ret i128 %result | ret i128 %result | ||||
} | } | ||||
define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { | define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { | ||||
; GFX6-LABEL: v_fshl_i128_ssv: | ; GFX6-LABEL: v_fshl_i128_ssv: | ||||
; GFX6: ; %bb.0: | ; GFX6: ; %bb.0: | ||||
▲ Show 20 Lines • Show All 383 Lines • ▼ Show 20 Lines | |||||
; GFX9-NEXT: s_cmp_eq_u32 s8, 0 | ; GFX9-NEXT: s_cmp_eq_u32 s8, 0 | ||||
; GFX9-NEXT: s_cselect_b32 s13, 1, 0 | ; GFX9-NEXT: s_cselect_b32 s13, 1, 0 | ||||
; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], s8 | ; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], s8 | ||||
; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 | ; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 | ||||
; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 | ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 | ||||
; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] | ; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] | ||||
; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 | ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 | ||||
; GFX9-NEXT: s_cmp_lg_u32 s12, 0 | ; GFX9-NEXT: s_cmp_lg_u32 s12, 0 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] | |||||
; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 | ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 | ||||
; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] | ; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] | ||||
; GFX9-NEXT: s_cmp_lg_u32 s13, 0 | ; GFX9-NEXT: s_cmp_lg_u32 s13, 0 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] | |||||
; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] | ; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] | ||||
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v2 | ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 31, v1 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] | ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] | ||||
; GFX9-NEXT: s_sub_i32 s2, s4, 64 | ; GFX9-NEXT: s_sub_i32 s2, s4, 64 | ||||
; GFX9-NEXT: s_sub_i32 s3, 64, s4 | ; GFX9-NEXT: s_sub_i32 s3, 64, s4 | ||||
; GFX9-NEXT: v_or_b32_e32 v1, v1, v4 | |||||
; GFX9-NEXT: s_cmp_lt_u32 s4, 64 | ; GFX9-NEXT: s_cmp_lt_u32 s4, 64 | ||||
; GFX9-NEXT: s_cselect_b32 s5, 1, 0 | ; GFX9-NEXT: s_cselect_b32 s5, 1, 0 | ||||
; GFX9-NEXT: s_cmp_eq_u32 s4, 0 | ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] | ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] | ||||
; GFX9-NEXT: v_lshlrev_b64 v[6:7], s3, v[2:3] | ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s3, v[2:3] | ||||
; GFX9-NEXT: s_cselect_b32 s8, 1, 0 | ; GFX9-NEXT: s_cselect_b32 s8, 1, 0 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3] | ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3] | ||||
; GFX9-NEXT: v_lshrrev_b64 v[2:3], s2, v[2:3] | ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s2, v[2:3] | ||||
Show All 21 Lines | |||||
; GFX10: ; %bb.0: | ; GFX10: ; %bb.0: | ||||
; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f | ; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f | ||||
; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] | ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] | ||||
; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] | ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] | ||||
; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] | ; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] | ||||
; GFX10-NEXT: s_sub_i32 s5, s8, 64 | ; GFX10-NEXT: s_sub_i32 s5, s8, 64 | ||||
; GFX10-NEXT: s_sub_i32 s6, 64, s8 | ; GFX10-NEXT: s_sub_i32 s6, 64, s8 | ||||
; GFX10-NEXT: s_cmp_lt_u32 s8, 64 | ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 | ||||
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 31, v2 | ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 31, v1 | ||||
; GFX10-NEXT: s_cselect_b32 s12, 1, 0 | ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 | ||||
; GFX10-NEXT: s_cmp_eq_u32 s8, 0 | ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] | ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] | ||||
; GFX10-NEXT: s_cselect_b32 s13, 1, 0 | ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 | ||||
; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 | ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 | ||||
; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s8 | ; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s8 | ||||
; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 | ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 | ||||
; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] | ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] | ||||
; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 | ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 | ||||
; GFX10-NEXT: s_cmp_lg_u32 s12, 0 | ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 | ||||
; GFX10-NEXT: v_or_b32_e32 v1, v1, v4 | ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] | ||||
; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 | ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 | ||||
; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] | ; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] | ||||
; GFX10-NEXT: s_cmp_lg_u32 s13, 0 | ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 | ||||
; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] | ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] | ||||
; GFX10-NEXT: s_sub_i32 s0, 64, s4 | ; GFX10-NEXT: s_sub_i32 s0, 64, s4 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] | |||||
; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] | ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] | ||||
; GFX10-NEXT: s_sub_i32 s0, s4, 64 | ; GFX10-NEXT: s_sub_i32 s0, s4, 64 | ||||
; GFX10-NEXT: s_cmp_lt_u32 s4, 64 | ; GFX10-NEXT: s_cmp_lt_u32 s4, 64 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] | ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] | ||||
; GFX10-NEXT: s_cselect_b32 s1, 1, 0 | ; GFX10-NEXT: s_cselect_b32 s1, 1, 0 | ||||
; GFX10-NEXT: s_cmp_eq_u32 s4, 0 | ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 | ||||
; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 | ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 | ||||
; GFX10-NEXT: s_cselect_b32 s5, 1, 0 | ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 | ||||
Show All 21 Lines | |||||
; GFX11: ; %bb.0: | ; GFX11: ; %bb.0: | ||||
; GFX11-NEXT: s_mov_b64 s[6:7], 0x7f | ; GFX11-NEXT: s_mov_b64 s[6:7], 0x7f | ||||
; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] | ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] | ||||
; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] | ; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] | ||||
; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] | ; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] | ||||
; GFX11-NEXT: s_sub_i32 s5, s8, 64 | ; GFX11-NEXT: s_sub_i32 s5, s8, 64 | ||||
; GFX11-NEXT: s_sub_i32 s6, 64, s8 | ; GFX11-NEXT: s_sub_i32 s6, 64, s8 | ||||
; GFX11-NEXT: s_cmp_lt_u32 s8, 64 | ; GFX11-NEXT: s_cmp_lt_u32 s8, 64 | ||||
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 31, v2 | ; GFX11-NEXT: v_lshl_or_b32 v1, v2, 31, v1 | ||||
; GFX11-NEXT: s_cselect_b32 s12, 1, 0 | ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 | ||||
; GFX11-NEXT: s_cmp_eq_u32 s8, 0 | ; GFX11-NEXT: s_cmp_eq_u32 s8, 0 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] | ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] | ||||
; GFX11-NEXT: s_cselect_b32 s13, 1, 0 | ; GFX11-NEXT: s_cselect_b32 s13, 1, 0 | ||||
; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 | ; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 | ||||
; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s8 | ; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s8 | ||||
; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 | ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 | ||||
; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] | ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] | ||||
; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 | ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 | ||||
; GFX11-NEXT: s_cmp_lg_u32 s12, 0 | ; GFX11-NEXT: s_cmp_lg_u32 s12, 0 | ||||
; GFX11-NEXT: v_or_b32_e32 v1, v1, v4 | ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] | ||||
; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 | ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 | ||||
; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] | ; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] | ||||
; GFX11-NEXT: s_cmp_lg_u32 s13, 0 | ; GFX11-NEXT: s_cmp_lg_u32 s13, 0 | ||||
; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] | ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] | ||||
; GFX11-NEXT: s_sub_i32 s0, 64, s4 | ; GFX11-NEXT: s_sub_i32 s0, 64, s4 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] | ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | ||||
; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] | ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] | ||||
; GFX11-NEXT: s_sub_i32 s0, s4, 64 | ; GFX11-NEXT: s_sub_i32 s0, s4, 64 | ||||
; GFX11-NEXT: s_cmp_lt_u32 s4, 64 | ; GFX11-NEXT: s_cmp_lt_u32 s4, 64 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] | ; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] | ||||
; GFX11-NEXT: s_cselect_b32 s1, 1, 0 | ; GFX11-NEXT: s_cselect_b32 s1, 1, 0 | ||||
; GFX11-NEXT: s_cmp_eq_u32 s4, 0 | ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 | ||||
; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 | ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 | ||||
; GFX11-NEXT: s_cselect_b32 s5, 1, 0 | ; GFX11-NEXT: s_cselect_b32 s5, 1, 0 | ||||
; GFX11-NEXT: s_and_b32 s0, 1, s1 | ; GFX11-NEXT: s_and_b32 s0, 1, s1 | ||||
; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 | ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 | ||||
; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 | ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 | ||||
; GFX11-NEXT: s_and_b32 s0, 1, s5 | ; GFX11-NEXT: s_and_b32 s0, 1, s5 | ||||
; GFX11-NEXT: s_and_b32 s1, 1, s1 | ; GFX11-NEXT: s_and_b32 s1, 1, s1 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] | ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] | ||||
; GFX11-NEXT: v_dual_cndmask_b32 v5, v9, v5 :: v_dual_cndmask_b32 v4, v8, v4 | ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 | ||||
; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 | ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 | ||||
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 | ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) | ||||
; GFX11-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v4, v0 | ; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 | |||||
; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 | ; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 | ||||
; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 | |||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 | ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 | ||||
; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 | ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 | ||||
; GFX11-NEXT: ; return to shader part epilog | ; GFX11-NEXT: ; return to shader part epilog | ||||
%result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) | %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) | ||||
%cast.result = bitcast i128 %result to <4 x float> | %cast.result = bitcast i128 %result to <4 x float> | ||||
ret <4 x float> %cast.result | ret <4 x float> %cast.result | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,030 Lines • ▼ Show 20 Lines | |||||
; GFX8-NEXT: v_or_b32_e32 v6, v16, v6 | ; GFX8-NEXT: v_or_b32_e32 v6, v16, v6 | ||||
; GFX8-NEXT: v_or_b32_e32 v7, v20, v7 | ; GFX8-NEXT: v_or_b32_e32 v7, v20, v7 | ||||
; GFX8-NEXT: s_setpc_b64 s[30:31] | ; GFX8-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX9-LABEL: v_fshl_v2i128: | ; GFX9-LABEL: v_fshl_v2i128: | ||||
; GFX9: ; %bb.0: | ; GFX9: ; %bb.0: | ||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16 | ; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] | |||||
; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23 | ; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23 | ||||
; GFX9-NEXT: v_xor_b32_e32 v16, -1, v16 | |||||
; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] | ; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] | ||||
; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] | ; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] | ||||
; GFX9-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] | |||||
; GFX9-NEXT: v_xor_b32_e32 v16, -1, v16 | |||||
; GFX9-NEXT: v_or_b32_e32 v21, v17, v21 | |||||
; GFX9-NEXT: v_lshlrev_b32_e32 v17, 31, v10 | |||||
; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16 | ; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16 | ||||
; GFX9-NEXT: v_lshl_or_b32 v9, v10, 31, v9 | |||||
; GFX9-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] | ; GFX9-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] | ||||
; GFX9-NEXT: v_or_b32_e32 v9, v9, v17 | |||||
; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24 | ; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24 | ||||
; GFX9-NEXT: v_or_b32_e32 v21, v17, v21 | |||||
; GFX9-NEXT: v_or_b32_e32 v22, v18, v22 | ; GFX9-NEXT: v_or_b32_e32 v22, v18, v22 | ||||
; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] | ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] | ||||
; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] | ; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] | ||||
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 | ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 | ||||
; GFX9-NEXT: v_or_b32_e32 v18, v18, v16 | ; GFX9-NEXT: v_or_b32_e32 v18, v18, v16 | ||||
; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v23 | ; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v23 | ||||
; GFX9-NEXT: v_or_b32_e32 v19, v19, v17 | ; GFX9-NEXT: v_or_b32_e32 v19, v19, v17 | ||||
; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] | ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] | ||||
Show All 30 Lines | |||||
; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 | ; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 | ||||
; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 | ; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 | ||||
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v16, v[4:5] | ; GFX9-NEXT: v_lshlrev_b64 v[8:9], v16, v[4:5] | ||||
; GFX9-NEXT: v_lshlrev_b64 v[4:5], v18, v[4:5] | ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v18, v[4:5] | ||||
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 | ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v11, vcc | ||||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 | ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v16, v4, v6, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v16, v4, v6, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc | |||||
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13] | ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13] | ||||
; GFX9-NEXT: v_lshlrev_b32_e32 v6, 31, v14 | ; GFX9-NEXT: v_cndmask_b32_e32 v12, v8, v7, vcc | ||||
; GFX9-NEXT: v_or_b32_e32 v5, v5, v6 | |||||
; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15] | ; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15] | ||||
; GFX9-NEXT: v_lshl_or_b32 v5, v14, 31, v5 | |||||
; GFX9-NEXT: v_sub_u32_e32 v10, 64, v17 | ; GFX9-NEXT: v_sub_u32_e32 v10, 64, v17 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v17, v[4:5] | ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v17, v[4:5] | ||||
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7] | ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7] | ||||
; GFX9-NEXT: v_subrev_u32_e32 v12, 64, v17 | ; GFX9-NEXT: v_subrev_u32_e32 v13, 64, v17 | ||||
; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 | ; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 | ||||
; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 | ; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 | ||||
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v17, v[6:7] | ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v17, v[6:7] | ||||
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v12, v[6:7] | ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v13, v[6:7] | ||||
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 | ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc | ||||
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v17 | ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v17 | ||||
; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] | ; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] | ||||
; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] | ; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc | ||||
; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc | ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc | ||||
; GFX9-NEXT: v_or_b32_e32 v4, v18, v4 | ; GFX9-NEXT: v_or_b32_e32 v4, v18, v4 | ||||
; GFX9-NEXT: v_or_b32_e32 v5, v19, v5 | ; GFX9-NEXT: v_or_b32_e32 v5, v19, v5 | ||||
; GFX9-NEXT: v_or_b32_e32 v6, v16, v6 | ; GFX9-NEXT: v_or_b32_e32 v6, v16, v6 | ||||
; GFX9-NEXT: v_or_b32_e32 v7, v20, v7 | ; GFX9-NEXT: v_or_b32_e32 v7, v12, v7 | ||||
; GFX9-NEXT: s_setpc_b64 s[30:31] | ; GFX9-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX10-LABEL: v_fshl_v2i128: | ; GFX10-LABEL: v_fshl_v2i128: | ||||
; GFX10: ; %bb.0: | ; GFX10: ; %bb.0: | ||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 | ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 | ||||
; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16 | ; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16 | ||||
; GFX10-NEXT: v_xor_b32_e32 v16, -1, v16 | ; GFX10-NEXT: v_xor_b32_e32 v16, -1, v16 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] | ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] | ||||
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 31, v10 | |||||
; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] | |||||
; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27 | ; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27 | ||||
; GFX10-NEXT: v_and_b32_e32 v28, 0x7f, v16 | ; GFX10-NEXT: v_and_b32_e32 v28, 0x7f, v16 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] | ; GFX10-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] | ||||
; GFX10-NEXT: v_or_b32_e32 v9, v9, v21 | ; GFX10-NEXT: v_lshl_or_b32 v9, v10, 31, v9 | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 | ; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] | ||||
; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] | ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] | ||||
; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28 | ; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] | ; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] | ; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] | ||||
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] | ; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] | ||||
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 | ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 | ||||
; GFX10-NEXT: v_or_b32_e32 v18, v16, v18 | ; GFX10-NEXT: v_or_b32_e32 v18, v16, v18 | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 | ; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] | ; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] | ||||
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] | |||||
; GFX10-NEXT: v_or_b32_e32 v19, v17, v19 | ; GFX10-NEXT: v_or_b32_e32 v19, v17, v19 | ||||
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v28 | ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v28 | ||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28 | |||||
; GFX10-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] | ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] | ||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v27 | ; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo | ||||
; GFX10-NEXT: v_or_b32_e32 v23, v23, v25 | ; GFX10-NEXT: v_or_b32_e32 v23, v23, v25 | ||||
; GFX10-NEXT: v_or_b32_e32 v24, v24, v26 | |||||
; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo | |||||
; GFX10-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo | ||||
; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] | ; GFX10-NEXT: v_or_b32_e32 v0, v24, v26 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo | |||||
; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo | |||||
; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v23, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v23, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v10, v17, v24, s4 | ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v27 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo | ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v22, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v22, v19, v3, s6 | ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] | ||||
; GFX10-NEXT: v_and_b32_e32 v24, 0x7f, v20 | |||||
; GFX10-NEXT: v_cndmask_b32_e32 v23, v19, v3, vcc_lo | |||||
; GFX10-NEXT: v_cndmask_b32_e64 v3, v16, v8, s5 | ; GFX10-NEXT: v_cndmask_b32_e64 v3, v16, v8, s5 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6 | ; GFX10-NEXT: v_cndmask_b32_e64 v8, v17, v9, s5 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v9, s5 | ; GFX10-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc_lo | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v0, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v0, s4 | ||||
; GFX10-NEXT: v_and_b32_e32 v23, 0x7f, v20 | ; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, v1, s4 | ||||
; GFX10-NEXT: v_or_b32_e32 v0, v21, v3 | ; GFX10-NEXT: v_or_b32_e32 v0, v21, v3 | ||||
; GFX10-NEXT: v_xor_b32_e32 v3, -1, v20 | ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v20 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, v1, s4 | ; GFX10-NEXT: v_or_b32_e32 v1, v22, v8 | ||||
; GFX10-NEXT: v_or_b32_e32 v1, v11, v8 | |||||
; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v23 | |||||
; GFX10-NEXT: v_or_b32_e32 v2, v2, v9 | |||||
; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] | ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] | ||||
; GFX10-NEXT: v_lshlrev_b32_e32 v16, 31, v14 | ; GFX10-NEXT: v_sub_nc_u32_e32 v11, 64, v24 | ||||
; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v3 | ; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5] | ; GFX10-NEXT: v_and_b32_e32 v22, 0x7f, v3 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[12:13], v23, v[6:7] | ; GFX10-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7] | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v24 | |||||
; GFX10-NEXT: v_lshrrev_b64 v[10:11], v11, v[4:5] | |||||
; GFX10-NEXT: v_lshl_or_b32 v9, v14, 31, v9 | |||||
; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] | ; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] | ||||
; GFX10-NEXT: v_or_b32_e32 v9, v9, v16 | ; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v22 | ||||
; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v25 | ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5] | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v23 | ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24 | ||||
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v23, v[4:5] | |||||
; GFX10-NEXT: v_or_b32_e32 v12, v10, v12 | ; GFX10-NEXT: v_or_b32_e32 v12, v10, v12 | ||||
; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v25 | ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v22 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[18:19], v25, v[8:9] | ; GFX10-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9] | ||||
; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] | ; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] | ||||
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v23 | |||||
; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] | ; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] | ||||
; GFX10-NEXT: v_or_b32_e32 v5, v11, v13 | ; GFX10-NEXT: v_or_b32_e32 v5, v11, v13 | ||||
; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] | ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] | ||||
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v25 | |||||
; GFX10-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo | ||||
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v22 | |||||
; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 | ; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 | ||||
; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 | ; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo | ||||
; GFX10-NEXT: v_lshrrev_b64 v[3:4], v25, v[14:15] | ; GFX10-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15] | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 | ||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v25 | ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v22 | ||||
; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v23 | ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v24 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v18, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v18, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo | ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, v6, s6 | ; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, v6, s6 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 | ; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v5, v10, v8, s5 | ; GFX10-NEXT: v_cndmask_b32_e64 v5, v10, v8, s5 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v9, s5 | ; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v9, s5 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 | ||||
; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 | ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 | ||||
; GFX10-NEXT: v_or_b32_e32 v3, v22, v24 | ; GFX10-NEXT: v_or_b32_e32 v3, v23, v25 | ||||
; GFX10-NEXT: v_or_b32_e32 v4, v13, v5 | ; GFX10-NEXT: v_or_b32_e32 v4, v13, v5 | ||||
; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 | ; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 | ||||
; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 | ; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 | ||||
; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 | ; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 | ||||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ; GFX10-NEXT: s_setpc_b64 s[30:31] | ||||
; | ; | ||||
; GFX11-LABEL: v_fshl_v2i128: | ; GFX11-LABEL: v_fshl_v2i128: | ||||
; GFX11: ; %bb.0: | ; GFX11: ; %bb.0: | ||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 | ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] | |||||
; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16 | ; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16 | ||||
; GFX11-NEXT: v_lshlrev_b32_e32 v21, 31, v10 | |||||
; GFX11-NEXT: v_xor_b32_e32 v16, -1, v16 | ; GFX11-NEXT: v_xor_b32_e32 v16, -1, v16 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] | ; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27 | |||||
; GFX11-NEXT: v_or_b32_e32 v9, v9, v21 | |||||
; GFX11-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] | ; GFX11-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] | ||||
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 | ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 | ||||
; GFX11-NEXT: v_and_b32_e32 v28, 0x7f, v16 | ; GFX11-NEXT: v_and_b32_e32 v28, 0x7f, v16 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | ||||
; GFX11-NEXT: v_lshl_or_b32 v9, v10, 31, v9 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] | |||||
; GFX11-NEXT: v_dual_cndmask_b32 v21, 0, v21 :: v_dual_cndmask_b32 v22, 0, v22 | |||||
; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27 | |||||
; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] | ; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] | ||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo | |||||
; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28 | ; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28 | ||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] | ; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] | ||||
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] | |||||
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v28 | ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v28 | ||||
; GFX11-NEXT: v_or_b32_e32 v18, v16, v18 | |||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 | |||||
; GFX11-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] | ; GFX11-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] | ||||
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] | ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] | ||||
; GFX11-NEXT: v_or_b32_e32 v19, v17, v19 | |||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v28 | ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v28 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] | ; GFX11-NEXT: v_or_b32_e32 v18, v16, v18 | ||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28 | |||||
; GFX11-NEXT: v_or_b32_e32 v19, v17, v19 | |||||
; GFX11-NEXT: v_or_b32_e32 v23, v23, v25 | ; GFX11-NEXT: v_or_b32_e32 v23, v23, v25 | ||||
; GFX11-NEXT: v_or_b32_e32 v24, v24, v26 | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19 | ; GFX11-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo | ||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v27 | ; GFX11-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo | |||||
; GFX11-NEXT: v_or_b32_e32 v0, v24, v26 | |||||
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v27 | |||||
; GFX11-NEXT: v_and_b32_e32 v24, 0x7f, v20 | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v23, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v23, s0 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s0 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] | ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v10, v17, v24, s0 | ; GFX11-NEXT: v_dual_cndmask_b32 v2, v18, v2 :: v_dual_cndmask_b32 v23, v19, v3 | ||||
; GFX11-NEXT: v_cndmask_b32_e32 v11, 0, v22, vcc_lo | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v22, v19, v3, s2 | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v3, v16, v8, s1 | ; GFX11-NEXT: v_cndmask_b32_e64 v3, v16, v8, s1 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s2 | ; GFX11-NEXT: v_cndmask_b32_e64 v8, v17, v9, s1 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v9, s1 | ; GFX11-NEXT: v_sub_nc_u32_e32 v11, 64, v24 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v0, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0 | ||||
; GFX11-NEXT: v_and_b32_e32 v23, 0x7f, v20 | ; GFX11-NEXT: v_cndmask_b32_e64 v25, 0, v1, s0 | ||||
; GFX11-NEXT: v_or_b32_e32 v0, v21, v3 | ; GFX11-NEXT: v_or_b32_e32 v0, v21, v3 | ||||
; GFX11-NEXT: v_xor_b32_e32 v3, -1, v20 | ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v20 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v24, 0, v1, s0 | ; GFX11-NEXT: v_or_b32_e32 v1, v22, v8 | ||||
; GFX11-NEXT: v_or_b32_e32 v1, v11, v8 | |||||
; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v23 | |||||
; GFX11-NEXT: v_or_b32_e32 v2, v2, v9 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] | ; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] | ||||
; GFX11-NEXT: v_lshlrev_b32_e32 v16, 31, v14 | ; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 | ||||
; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v3 | ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v11, v[4:5] | ||||
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5] | ; GFX11-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7] | ||||
; GFX11-NEXT: v_lshlrev_b64 v[12:13], v23, v[6:7] | ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5] | ||||
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24 | |||||
; GFX11-NEXT: v_and_b32_e32 v22, 0x7f, v3 | |||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v24 | |||||
; GFX11-NEXT: v_lshl_or_b32 v9, v14, 31, v9 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] | ; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] | ||||
; GFX11-NEXT: v_or_b32_e32 v9, v9, v16 | |||||
; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v25 | |||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v23 | |||||
; GFX11-NEXT: v_lshlrev_b64 v[16:17], v23, v[4:5] | |||||
; GFX11-NEXT: v_or_b32_e32 v12, v10, v12 | ; GFX11-NEXT: v_or_b32_e32 v12, v10, v12 | ||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v25 | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | ||||
; GFX11-NEXT: v_lshrrev_b64 v[18:19], v25, v[8:9] | |||||
; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] | |||||
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v23 | |||||
; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] | ; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] | ||||
; GFX11-NEXT: v_or_b32_e32 v5, v11, v13 | ; GFX11-NEXT: v_or_b32_e32 v5, v11, v13 | ||||
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] | |||||
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v25 | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo | ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo | ||||
; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v22 | |||||
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v22 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9] | |||||
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v22 | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo | |||||
; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] | |||||
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo | |||||
; GFX11-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15] | |||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v22 | |||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v24 | |||||
; GFX11-NEXT: v_or_b32_e32 v16, v18, v20 | ; GFX11-NEXT: v_or_b32_e32 v16, v18, v20 | ||||
; GFX11-NEXT: v_or_b32_e32 v18, v19, v21 | ; GFX11-NEXT: v_or_b32_e32 v18, v19, v21 | ||||
; GFX11-NEXT: v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5 | |||||
; GFX11-NEXT: v_lshrrev_b64 v[3:4], v25, v[14:15] | |||||
; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo | ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2 | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 | ||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v25 | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) | ||||
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v23 | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2 | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2 | ; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1 | ; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1 | ||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | |||||
; GFX11-NEXT: v_cndmask_b32_e64 v8, v11, v9, s1 | ; GFX11-NEXT: v_cndmask_b32_e64 v8, v11, v9, s1 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0 | ||||
; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0 | ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0 | ||||
; GFX11-NEXT: v_or_b32_e32 v3, v22, v24 | ; GFX11-NEXT: v_or_b32_e32 v3, v23, v25 | ||||
; GFX11-NEXT: v_or_b32_e32 v4, v13, v5 | ; GFX11-NEXT: v_or_b32_e32 v4, v13, v5 | ||||
; GFX11-NEXT: v_or_b32_e32 v5, v14, v8 | ; GFX11-NEXT: v_or_b32_e32 v5, v14, v8 | ||||
; GFX11-NEXT: v_or_b32_e32 v6, v6, v9 | ; GFX11-NEXT: v_or_b32_e32 v6, v6, v9 | ||||
; GFX11-NEXT: v_or_b32_e32 v7, v7, v10 | ; GFX11-NEXT: v_or_b32_e32 v7, v7, v10 | ||||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ; GFX11-NEXT: s_setpc_b64 s[30:31] | ||||
%result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) | %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) | ||||
ret <2 x i128> %result | ret <2 x i128> %result | ||||
} | } | ||||
Show All 33 Lines |