Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||||
; RUN: llc -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s | ; RUN: llc -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s | ||||
; RUN: opt -S -si-annotate-control-flow -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI-OPT %s | ; RUN: opt -S -si-annotate-control-flow -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI-OPT %s | ||||
define hidden void @widget() { | define hidden void @widget() { | ||||
; GCN-LABEL: widget: | ; GCN-LABEL: widget: | ||||
; GCN: ; %bb.0: ; %bb | ; GCN: ; %bb.0: ; %bb | ||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 | ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 | ||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill | ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill | ||||
; GCN-NEXT: s_mov_b64 exec, s[4:5] | ; GCN-NEXT: s_mov_b64 exec, s[16:17] | ||||
; GCN-NEXT: v_writelane_b32 v40, s33, 2 | ; GCN-NEXT: v_writelane_b32 v40, s33, 2 | ||||
; GCN-NEXT: s_mov_b32 s33, s32 | ; GCN-NEXT: s_mov_b32 s33, s32 | ||||
; GCN-NEXT: s_add_u32 s32, s32, 0x400 | ; GCN-NEXT: s_add_u32 s32, s32, 0x400 | ||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0 | |||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1 | |||||
; GCN-NEXT: v_mov_b32_e32 v0, 0 | ; GCN-NEXT: v_mov_b32_e32 v0, 0 | ||||
; GCN-NEXT: v_mov_b32_e32 v1, 0 | ; GCN-NEXT: v_mov_b32_e32 v1, 0 | ||||
; GCN-NEXT: flat_load_dword v0, v[0:1] | ; GCN-NEXT: flat_load_dword v0, v[0:1] | ||||
; GCN-NEXT: s_waitcnt vmcnt(0) | ; GCN-NEXT: s_waitcnt vmcnt(0) | ||||
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 | ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 | ||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc | ; GCN-NEXT: s_and_b64 vcc, exec, vcc | ||||
; GCN-NEXT: v_writelane_b32 v40, s30, 0 | |||||
; GCN-NEXT: v_writelane_b32 v40, s31, 1 | |||||
; GCN-NEXT: s_cbranch_vccz BB0_3 | ; GCN-NEXT: s_cbranch_vccz BB0_3 | ||||
; GCN-NEXT: ; %bb.1: ; %bb4 | ; GCN-NEXT: ; %bb.1: ; %bb4 | ||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0 | ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0 | ||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc | ; GCN-NEXT: s_and_b64 vcc, exec, vcc | ||||
; GCN-NEXT: s_cbranch_vccnz BB0_4 | ; GCN-NEXT: s_cbranch_vccnz BB0_4 | ||||
; GCN-NEXT: ; %bb.2: ; %bb7 | ; GCN-NEXT: ; %bb.2: ; %bb7 | ||||
; GCN-NEXT: s_getpc_b64 s[4:5] | ; GCN-NEXT: s_getpc_b64 s[16:17] | ||||
; GCN-NEXT: s_add_u32 s4, s4, wibble@rel32@lo+4 | ; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4 | ||||
; GCN-NEXT: s_addc_u32 s5, s5, wibble@rel32@hi+12 | ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 | ||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] | ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] | ||||
; GCN-NEXT: s_branch BB0_7 | ; GCN-NEXT: s_branch BB0_7 | ||||
; GCN-NEXT: BB0_3: ; %bb2 | ; GCN-NEXT: BB0_3: ; %bb2 | ||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 21, v0 | ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 21, v0 | ||||
; GCN-NEXT: s_and_b64 vcc, exec, vcc | ; GCN-NEXT: s_and_b64 vcc, exec, vcc | ||||
; GCN-NEXT: s_cbranch_vccnz BB0_6 | ; GCN-NEXT: s_cbranch_vccnz BB0_6 | ||||
; GCN-NEXT: BB0_4: ; %bb9 | ; GCN-NEXT: BB0_4: ; %bb9 | ||||
; GCN-NEXT: s_getpc_b64 s[4:5] | ; GCN-NEXT: s_getpc_b64 s[16:17] | ||||
; GCN-NEXT: s_add_u32 s4, s4, wibble@rel32@lo+4 | ; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4 | ||||
; GCN-NEXT: s_addc_u32 s5, s5, wibble@rel32@hi+12 | ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 | ||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] | ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] | ||||
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 | ; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 | ||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc | ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc | ||||
; GCN-NEXT: s_cbranch_execnz BB0_7 | ; GCN-NEXT: s_cbranch_execnz BB0_7 | ||||
; GCN-NEXT: ; %bb.5: ; %bb9.bb12_crit_edge | ; GCN-NEXT: ; %bb.5: ; %bb9.bb12_crit_edge | ||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ||||
; GCN-NEXT: BB0_6: ; %bb12 | ; GCN-NEXT: BB0_6: ; %bb12 | ||||
; GCN-NEXT: v_mov_b32_e32 v2, 0 | ; GCN-NEXT: v_mov_b32_e32 v2, 0 | ||||
; GCN-NEXT: v_mov_b32_e32 v0, 0 | ; GCN-NEXT: v_mov_b32_e32 v0, 0 | ||||
; GCN-NEXT: v_mov_b32_e32 v1, 0 | ; GCN-NEXT: v_mov_b32_e32 v1, 0 | ||||
; GCN-NEXT: flat_store_dword v[0:1], v2 | ; GCN-NEXT: flat_store_dword v[0:1], v2 | ||||
; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock | ; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock | ||||
; GCN-NEXT: v_readlane_b32 s4, v40, 0 | ; GCN-NEXT: v_readlane_b32 s4, v40, 0 | ||||
; GCN-NEXT: v_readlane_b32 s5, v40, 1 | ; GCN-NEXT: v_readlane_b32 s5, v40, 1 | ||||
; GCN-NEXT: s_sub_u32 s32, s32, 0x400 | ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 | ||||
; GCN-NEXT: v_readlane_b32 s33, v40, 2 | ; GCN-NEXT: v_readlane_b32 s33, v40, 2 | ||||
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 | ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 | ||||
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload | ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload | ||||
; GCN-NEXT: s_mov_b64 exec, s[6:7] | ; GCN-NEXT: s_mov_b64 exec, s[6:7] | ||||
; GCN-NEXT: s_waitcnt vmcnt(0) | ; GCN-NEXT: s_waitcnt vmcnt(0) | ||||
; GCN-NEXT: s_setpc_b64 s[4:5] | ; GCN-NEXT: s_setpc_b64 s[4:5] | ||||
; SI-OPT-LABEL: @widget( | ; SI-OPT-LABEL: @widget( | ||||
; SI-OPT-NEXT: bb: | ; SI-OPT-NEXT: bb: | ||||
; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16 | ; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16 | ||||
; SI-OPT-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP]], 21 | ; SI-OPT-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP]], 21 | ||||
; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB4:%.*]], label [[BB2:%.*]] | ; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB4:%.*]], label [[BB2:%.*]] | ||||
; SI-OPT: bb2: | ; SI-OPT: bb2: | ||||
; SI-OPT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP]], 21 | ; SI-OPT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP]], 21 | ||||
; SI-OPT-NEXT: br i1 [[TMP3]], label [[BB12:%.*]], label [[BB9:%.*]] | ; SI-OPT-NEXT: br i1 [[TMP3]], label [[BB12:%.*]], label [[BB9:%.*]] | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | |||||
; SI-OPT-NEXT: br label [[BB18]] | ; SI-OPT-NEXT: br label [[BB18]] | ||||
; SI-OPT: bb18: | ; SI-OPT: bb18: | ||||
; SI-OPT-NEXT: store float 0x7FF8000000000000, float addrspace(5)* null, align 4 | ; SI-OPT-NEXT: store float 0x7FF8000000000000, float addrspace(5)* null, align 4 | ||||
; SI-OPT-NEXT: br label [[BB2]] | ; SI-OPT-NEXT: br label [[BB2]] | ||||
; | ; | ||||
; GCN-LABEL: blam: | ; GCN-LABEL: blam: | ||||
; GCN: ; %bb.0: ; %bb | ; GCN: ; %bb.0: ; %bb | ||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||||
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 | ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 | ||||
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill | ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill | ||||
; GCN-NEXT: s_mov_b64 exec, s[4:5] | ; GCN-NEXT: s_mov_b64 exec, s[16:17] | ||||
; GCN-NEXT: v_writelane_b32 v43, s33, 4 | ; GCN-NEXT: v_writelane_b32 v44, s33, 15 | ||||
; GCN-NEXT: s_mov_b32 s33, s32 | ; GCN-NEXT: s_mov_b32 s33, s32 | ||||
; GCN-NEXT: s_add_u32 s32, s32, 0x800 | ; GCN-NEXT: s_add_u32 s32, s32, 0x800 | ||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill | ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill | ||||
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill | ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill | ||||
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill | ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill | ||||
; GCN-NEXT: v_writelane_b32 v43, s34, 0 | ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill | ||||
; GCN-NEXT: v_writelane_b32 v43, s35, 1 | ; GCN-NEXT: v_writelane_b32 v44, s34, 0 | ||||
; GCN-NEXT: v_writelane_b32 v43, s36, 2 | ; GCN-NEXT: v_writelane_b32 v44, s35, 1 | ||||
; GCN-NEXT: v_writelane_b32 v43, s37, 3 | ; GCN-NEXT: v_writelane_b32 v44, s36, 2 | ||||
; GCN-NEXT: v_writelane_b32 v44, s38, 3 | |||||
; GCN-NEXT: v_writelane_b32 v44, s39, 4 | |||||
; GCN-NEXT: v_writelane_b32 v44, s40, 5 | |||||
; GCN-NEXT: v_writelane_b32 v44, s41, 6 | |||||
; GCN-NEXT: v_writelane_b32 v44, s42, 7 | |||||
; GCN-NEXT: v_writelane_b32 v44, s43, 8 | |||||
; GCN-NEXT: v_writelane_b32 v44, s44, 9 | |||||
; GCN-NEXT: v_writelane_b32 v44, s45, 10 | |||||
; GCN-NEXT: v_writelane_b32 v44, s46, 11 | |||||
; GCN-NEXT: v_writelane_b32 v44, s47, 12 | |||||
; GCN-NEXT: v_writelane_b32 v44, s48, 13 | |||||
; GCN-NEXT: v_writelane_b32 v44, s49, 14 | |||||
; GCN-NEXT: v_mov_b32_e32 v40, v31 | |||||
; GCN-NEXT: s_mov_b32 s34, s14 | |||||
; GCN-NEXT: s_mov_b32 s35, s13 | |||||
; GCN-NEXT: s_mov_b32 s36, s12 | |||||
; GCN-NEXT: s_mov_b64 s[38:39], s[10:11] | |||||
; GCN-NEXT: s_mov_b64 s[40:41], s[8:9] | |||||
; GCN-NEXT: s_mov_b64 s[42:43], s[6:7] | |||||
; GCN-NEXT: s_mov_b64 s[44:45], s[4:5] | |||||
; GCN-NEXT: s_mov_b64 s[4:5], 0 | ; GCN-NEXT: s_mov_b64 s[4:5], 0 | ||||
; GCN-NEXT: v_mov_b32_e32 v0, 0 | |||||
; GCN-NEXT: v_mov_b32_e32 v1, 0 | ; GCN-NEXT: v_mov_b32_e32 v1, 0 | ||||
; GCN-NEXT: v_mov_b32_e32 v2, 0 | ; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v40 | ||||
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0 | ; GCN-NEXT: flat_load_dword v41, v[0:1] | ||||
; GCN-NEXT: flat_load_dword v40, v[1:2] | ; GCN-NEXT: v_mov_b32_e32 v43, 0 | ||||
; GCN-NEXT: v_mov_b32_e32 v42, 0 | ; GCN-NEXT: s_getpc_b64 s[48:49] | ||||
; GCN-NEXT: s_getpc_b64 s[36:37] | ; GCN-NEXT: s_add_u32 s48, s48, spam@rel32@lo+4 | ||||
; GCN-NEXT: s_add_u32 s36, s36, spam@rel32@lo+4 | ; GCN-NEXT: s_addc_u32 s49, s49, spam@rel32@hi+12 | ||||
; GCN-NEXT: s_addc_u32 s37, s37, spam@rel32@hi+12 | ; GCN-NEXT: v_lshlrev_b32_e32 v42, 2, v2 | ||||
; GCN-NEXT: v_lshlrev_b32_e32 v41, 2, v0 | |||||
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||||
; GCN-NEXT: v_cmp_eq_f32_e64 s[34:35], 0, v40 | ; GCN-NEXT: v_cmp_eq_f32_e64 s[46:47], 0, v41 | ||||
; GCN-NEXT: s_branch BB1_3 | ; GCN-NEXT: s_branch BB1_3 | ||||
; GCN-NEXT: BB1_1: ; %bb10 | ; GCN-NEXT: BB1_1: ; %bb10 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ||||
; GCN-NEXT: s_or_b64 exec, exec, s[6:7] | ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] | ||||
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000 | ; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000 | ||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 | ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 | ||||
; GCN-NEXT: BB1_2: ; %bb18 | ; GCN-NEXT: BB1_2: ; %bb18 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ||||
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000 | ; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000 | ||||
; GCN-NEXT: s_mov_b64 s[4:5], 0 | ; GCN-NEXT: s_mov_b64 s[4:5], 0 | ||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 | ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 | ||||
; GCN-NEXT: BB1_3: ; %bb2 | ; GCN-NEXT: BB1_3: ; %bb2 | ||||
; GCN-NEXT: ; =>This Loop Header: Depth=1 | ; GCN-NEXT: ; =>This Loop Header: Depth=1 | ||||
; GCN-NEXT: ; Child Loop BB1_4 Depth 2 | ; GCN-NEXT: ; Child Loop BB1_4 Depth 2 | ||||
; GCN-NEXT: s_mov_b64 s[6:7], 0 | ; GCN-NEXT: s_mov_b64 s[6:7], 0 | ||||
; GCN-NEXT: BB1_4: ; %bb2 | ; GCN-NEXT: BB1_4: ; %bb2 | ||||
; GCN-NEXT: ; Parent Loop BB1_3 Depth=1 | ; GCN-NEXT: ; Parent Loop BB1_3 Depth=1 | ||||
; GCN-NEXT: ; => This Inner Loop Header: Depth=2 | ; GCN-NEXT: ; => This Inner Loop Header: Depth=2 | ||||
; GCN-NEXT: flat_load_dword v0, v[41:42] | ; GCN-NEXT: flat_load_dword v0, v[42:43] | ||||
; GCN-NEXT: v_mov_b32_e32 v1, 0 | ; GCN-NEXT: v_mov_b32_e32 v1, 0 | ||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 | ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 | ||||
; GCN-NEXT: s_waitcnt vmcnt(1) | ; GCN-NEXT: s_waitcnt vmcnt(1) | ||||
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 3, v0 | ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 3, v0 | ||||
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc | ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc | ||||
; GCN-NEXT: s_cbranch_execz BB1_6 | ; GCN-NEXT: s_cbranch_execz BB1_6 | ||||
; GCN-NEXT: ; %bb.5: ; %bb8 | ; GCN-NEXT: %bb.5: ; %bb8 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 | ; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 | ||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 | ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 | ||||
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7] | ; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7] | ||||
; GCN-NEXT: s_mov_b64 s[4:5], 0 | ; GCN-NEXT: s_mov_b64 s[4:5], 0 | ||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] | ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] | ||||
; GCN-NEXT: s_cbranch_execnz BB1_4 | ; GCN-NEXT: s_cbranch_execnz BB1_4 | ||||
; GCN-NEXT: s_branch BB1_1 | ; GCN-NEXT: s_branch BB1_1 | ||||
; GCN-NEXT: BB1_6: ; %bb6 | ; GCN-NEXT: BB1_6: ; %bb6 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 | ; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 | ||||
; GCN-NEXT: s_or_b64 exec, exec, s[8:9] | ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] | ||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 | ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 | ||||
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] | ; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] | ||||
; GCN-NEXT: s_mov_b64 s[6:7], 0 | ; GCN-NEXT: s_mov_b64 s[6:7], 0 | ||||
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] | ; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] | ||||
; GCN-NEXT: s_cbranch_execnz BB1_4 | ; GCN-NEXT: s_cbranch_execnz BB1_4 | ||||
; GCN-NEXT: ; %bb.7: ; %bb11 | ; GCN-NEXT: %bb.7: ; %bb11 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 | ; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 | ||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ; GCN-NEXT: _or_b64 exec, exec, s[4:5] | ||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[36:37] | ; GCN-NEXT: s_mov_b64 s[4:5], s[44:45] | ||||
; GCN-NEXT: s_mov_b64 s[6:7], s[42:43] | |||||
; GCN-NEXT: s_mov_b64 s[8:9], s[40:41] | |||||
; GCN-NEXT: s_mov_b64 s[10:11], s[38:39] | |||||
; GCN-NEXT: s_mov_b32 s12, s36 | |||||
; GCN-NEXT: s_mov_b32 s13, s35 | |||||
; GCN-NEXT: s_mov_b32 s14, s34 | |||||
; GCN-NEXT: v_mov_b32_e32 v31, v40 | |||||
; GCN-NEXT: s_swappc_b64 s[30:31], s[48:49] | |||||
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 | ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 | ||||
; GCN-NEXT: s_mov_b64 s[4:5], 0 | ; GCN-NEXT: s_mov_b64 s[4:5], 0 | ||||
; GCN-NEXT: s_mov_b64 s[6:7], 0 | ; GCN-NEXT: s_mov_b64 s[6:7], 0 | ||||
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc | ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc | ||||
; GCN-NEXT: s_cbranch_execnz BB1_4 | ; GCN-NEXT: s_cbranch_execnz BB1_4 | ||||
; GCN-NEXT: ; %bb.8: ; %bb14 | ; GCN-NEXT: ; %bb.8: ; %bb14 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ||||
; GCN-NEXT: s_or_b64 exec, exec, s[8:9] | ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] | ||||
; GCN-NEXT: s_and_saveexec_b64 s[4:5], s[34:35] | ; GCN-NEXT: s_and_saveexec_b64 s[4:5], s[46:47] | ||||
; GCN-NEXT: s_cbranch_execnz BB1_10 | ; GCN-NEXT: s_cbranch_execnz BB1_10 | ||||
; GCN-NEXT: ; %bb.9: ; %bb16 | ; GCN-NEXT: ; %bb.9: ; %bb16 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ||||
; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ||||
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000 | ; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000 | ||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 | ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 | ||||
; GCN-NEXT: BB1_10: ; %bb17 | ; GCN-NEXT: BB1_10: ; %bb17 | ||||
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 | ||||
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], 0 | ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], 0 | ||||
; GCN-NEXT: s_branch BB1_2 | ; GCN-NEXT: s_branch BB1_2 | ||||
bb: | bb: | ||||
%tmp = load float, float* null, align 16 | %tmp = load float, float* null, align 16 | ||||
br label %bb2 | br label %bb2 | ||||
bb1: ; preds = %bb8, %bb6 | bb1: ; preds = %bb8, %bb6 | ||||
br label %bb2 | br label %bb2 | ||||
bb2: | bb2: | ||||
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines |