Index: llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -211,8 +211,7 @@ if (!isUniformlyReached(DA, *BB)) ReturningBlocks.push_back(BB); } else if (isa(BB->getTerminator())) { - if (!isUniformlyReached(DA, *BB)) - UnreachableBlocks.push_back(BB); + UnreachableBlocks.push_back(BB); } else if (BranchInst *BI = dyn_cast(BB->getTerminator())) { ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext()); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll @@ -50,20 +50,28 @@ define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) { ; GCN-LABEL: sgpr_trunc_brcond: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN-NEXT: s_load_dword s1, s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s0, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_xor_b32 s1, s1, -1 +; GCN-NEXT: s_and_b32 s1, s1, 1 +; GCN-NEXT: s_cmp_lg_u32 s1, 0 +; GCN-NEXT: s_cbranch_scc0 .LBB3_2 +; GCN-NEXT: ; %bb.1: ; %bb1 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: .LBB3_2: ; %Flow ; GCN-NEXT: s_xor_b32 s0, s0, -1 ; GCN-NEXT: s_and_b32 s0, s0, 1 ; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc1 .LBB3_2 -; GCN-NEXT: ; %bb.1: ; %bb0 +; GCN-NEXT: s_cbranch_scc1 .LBB3_4 +; GCN-NEXT: ; %bb.3: ; %bb0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: flat_store_dword v[0:1], v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: .LBB3_2: ; %bb1 -; GCN-NEXT: v_mov_b32_e32 v0, 1 -; GCN-NEXT: flat_store_dword v[0:1], v0 -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: .LBB3_4: ; %UnifiedUnreachableBlock entry: %trunc = trunc i32 %cond to i1 br i1 %trunc, label %bb0, label %bb1 @@ -82,19 +90,27 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_and_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s0, s1 +; GCN-NEXT: s_xor_b32 s1, s1, -1 +; GCN-NEXT: s_and_b32 s1, s1, 1 +; GCN-NEXT: s_mov_b32 s0, -1 +; GCN-NEXT: s_cmp_lg_u32 s1, 0 +; GCN-NEXT: s_cbranch_scc0 .LBB4_2 +; GCN-NEXT: ; %bb.1: ; %bb1 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: .LBB4_2: ; %Flow ; GCN-NEXT: s_xor_b32 s0, s0, -1 ; GCN-NEXT: s_and_b32 s0, s0, 1 ; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc1 .LBB4_2 -; GCN-NEXT: ; %bb.1: ; %bb0 +; GCN-NEXT: s_cbranch_scc1 .LBB4_4 +; GCN-NEXT: ; %bb.3: ; %bb0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: flat_store_dword v[0:1], v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: .LBB4_2: ; %bb1 -; GCN-NEXT: v_mov_b32_e32 v0, 1 -; GCN-NEXT: flat_store_dword v[0:1], v0 -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: .LBB4_4: ; %UnifiedUnreachableBlock entry: %trunc0 = trunc i32 %cond0 to i1 %trunc1 = trunc i32 %cond1 to i1 Index: llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s + +declare void @llvm.trap() +declare i32 @llvm.amdgcn.workitem.id.x() + +define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n) { +; Make sure unreachable blocks are unifified and branch to the unified return block +; UNIFY-LABEL: define amdgpu_kernel void @kernel +; UNIFY-NEXT: entry: +; UNIFY-NEXT: %tid = call i32 @llvm.amdgcn.workitem.id.x() +; UNIFY-NEXT: %cmp = icmp eq i32 %n.load, 256 +; UNIFY-NEXT: br i1 %cmp, label %if.then, label %if.else +; UNIFY-LABEL: if.then: +; UNIFY-NEXT: %cmp1 = icmp eq i32 %a.load, 0 +; UNIFY-NEXT: br i1 %cmp1, label %if.end6.sink.split, label %cond.false +; UNIFY-LABEL: cond.false: +; UNIFY-NEXT: call void @llvm.trap() +; UNIFY-NEXT: br label %UnifiedUnreachableBlock +; UNIFY-LABEL: if.else: +; UNIFY-NEXT: %cmp2 = icmp ult i32 %tid, 10 +; UNIFY-NEXT: br i1 %cmp2, label %if.then3, label %UnifiedReturnBlock +; UNIFY-LABEL: if.then3: +; UNIFY-NEXT: %cmp1.i7 = icmp eq i32 %a.load, 0 +; UNIFY-NEXT: br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8 +; UNIFY-LABEL: cond.false.i8: +; UNIFY-NEXT: call void @llvm.trap() +; UNIFY-NEXT: br label %UnifiedUnreachableBlock +; UNIFY-LABEL: if.end6.sink.split: +; UNIFY-NEXT: %idxprom = sext i32 %tid to i64 +; UNIFY-NEXT: %x1 = getelementptr inbounds i32, i32 addrspace(1)* %x.load, i64 %idxprom +; UNIFY-NEXT: store i32 %a.load, i32 addrspace(1)* %x1, align 4 +; UNIFY-NEXT: br label %UnifiedReturnBlock +; UNIFY-LABEL: UnifiedUnreachableBlock: +; UNIFY-NEXT: call void @llvm.amdgcn.unreachable() +; UNIFY-NEXT: br label %UnifiedReturnBlock +; UNIFY-LABEL: UnifiedReturnBlock: +; UNIFY-NEXT: ret void + +; CHECK-LABEL: kernel: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x10 +; CHECK-NEXT: s_load_dword s10, s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmpk_lg_i32 s0, 0x100 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_6 +; CHECK-NEXT: ; %bb.1: ; %if.else +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 +; CHECK-NEXT: s_mov_b64 s[6:7], 0 +; CHECK-NEXT: s_mov_b64 s[2:3], 0 +; CHECK-NEXT: s_mov_b64 s[0:1], 0 +; CHECK-NEXT: s_and_saveexec_b64 s[8:9], vcc +; CHECK-NEXT: s_cbranch_execz .LBB0_5 +; CHECK-NEXT: ; %bb.2: ; %if.then3 +; CHECK-NEXT: s_cmp_lg_u32 s10, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_14 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: s_mov_b64 s[2:3], 0 +; CHECK-NEXT: s_mov_b64 s[0:1], -1 +; CHECK-NEXT: .LBB0_4: ; %Flow3 +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec +; CHECK-NEXT: s_and_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: .LBB0_5: ; %Flow2 +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7] +; CHECK-NEXT: s_cbranch_vccz .LBB0_8 +; CHECK-NEXT: s_branch .LBB0_7 +; CHECK-NEXT: .LBB0_6: +; CHECK-NEXT: s_mov_b64 s[2:3], 0 +; CHECK-NEXT: s_mov_b64 s[0:1], 0 +; CHECK-NEXT: s_cbranch_execz .LBB0_8 +; CHECK-NEXT: .LBB0_7: ; %if.then +; CHECK-NEXT: s_cmp_lg_u32 s10, 0 +; CHECK-NEXT: s_mov_b64 s[0:1], -1 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_13 +; CHECK-NEXT: .LBB0_8: ; %Flow4 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[2:3] +; CHECK-NEXT: .LBB0_9: ; %UnifiedUnreachableBlock +; CHECK-NEXT: ; divergent unreachable +; CHECK-NEXT: .LBB0_10: ; %Flow6 +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] +; CHECK-NEXT: s_cbranch_execz .LBB0_12 +; CHECK-NEXT: ; %bb.11: ; %if.end6.sink.split +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: v_mov_b32_e32 v1, s10 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_dword v0, v1, s[0:1] +; CHECK-NEXT: .LBB0_12: ; %UnifiedReturnBlock +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_13: ; %cond.false +; CHECK-NEXT: s_mov_b64 s[0:1], 0 +; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_trap 2 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[2:3] +; CHECK-NEXT: s_cbranch_execnz .LBB0_9 +; CHECK-NEXT: s_branch .LBB0_10 +; CHECK-NEXT: .LBB0_14: ; %cond.false.i8 +; CHECK-NEXT: s_mov_b64 s[2:3], -1 +; CHECK-NEXT: s_mov_b64 s[0:1], 0 +; CHECK-NEXT: s_trap 2 +; CHECK-NEXT: s_branch .LBB0_4 +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %cmp = icmp eq i32 %n, 256 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %cmp1 = icmp eq i32 %a, 0 + br i1 %cmp1, label %if.end6.sink.split, label %cond.false + +cond.false: + call void @llvm.trap() + unreachable + +if.else: + %cmp2 = icmp ult i32 %tid, 10 + br i1 %cmp2, label %if.then3, label %if.end6 + +if.then3: + %cmp1.i7 = icmp eq i32 %a, 0 + br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8 + +cond.false.i8: + call void @llvm.trap() + unreachable + +if.end6.sink.split: + %x1 = getelementptr inbounds i32, i32 addrspace(1)* %x, i32 %tid + store i32 %a, i32 addrspace(1)* %x1, align 4 + br label %if.end6 + +if.end6: + ret void +} Index: llvm/test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1292,40 +1292,45 @@ ; SI-LABEL: no_skip_no_successors: ; SI: ; %bb.0: ; %bb ; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 +; SI-NEXT: s_mov_b64 s[2:3], exec +; SI-NEXT: s_mov_b64 s[0:1], -1 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5] ; SI-NEXT: s_cbranch_vccz .LBB12_3 -; SI-NEXT: ; %bb.1: ; %bb6 -; SI-NEXT: s_mov_b64 s[2:3], exec +; SI-NEXT: ; %bb.1: ; %Flow +; SI-NEXT: s_and_b64 vcc, exec, s[0:1] +; SI-NEXT: s_cbranch_vccnz .LBB12_4 +; SI-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock +; SI-NEXT: .LBB12_3: ; %bb3 +; SI-NEXT: s_branch .LBB12_2 +; SI-NEXT: .LBB12_4: ; %bb6 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; SI-NEXT: s_cbranch_scc0 .LBB12_5 -; SI-NEXT: ; %bb.2: ; %bb6 +; SI-NEXT: s_cbranch_scc0 .LBB12_6 +; SI-NEXT: ; %bb.5: ; %bb6 ; SI-NEXT: s_mov_b64 exec, 0 -; SI-NEXT: .LBB12_3: ; %bb3 -; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148 -; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0 -; SI-NEXT: s_and_b64 vcc, exec, vcc -; SI-NEXT: ; %bb.4: ; %bb5 -; SI-NEXT: .LBB12_5: +; SI-NEXT: .LBB12_6: ; SI-NEXT: s_mov_b64 exec, 0 ; SI-NEXT: exp null off, off, off, off done vm ; SI-NEXT: s_endpgm ; ; GFX10-WAVE64-LABEL: no_skip_no_successors: ; GFX10-WAVE64: ; %bb.0: ; %bb -; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 -; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5] -; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3 -; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6 +; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[0:1], s1, 0 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec +; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], -1 +; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3 +; GFX10-WAVE64-NEXT: ; %bb.1: ; %Flow +; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB12_4 +; GFX10-WAVE64-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock +; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3 +; GFX10-WAVE64-NEXT: s_branch .LBB12_2 +; GFX10-WAVE64-NEXT: .LBB12_4: ; %bb6 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5 -; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6 +; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_6 +; GFX10-WAVE64-NEXT: ; %bb.5: ; %bb6 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 -; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3 -; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 -; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] -; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5 -; GFX10-WAVE64-NEXT: .LBB12_5: +; GFX10-WAVE64-NEXT: .LBB12_6: ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm ; GFX10-WAVE64-NEXT: s_endpgm @@ -1333,42 +1338,46 @@ ; GFX10-WAVE32-LABEL: no_skip_no_successors: ; GFX10-WAVE32: ; %bb.0: ; %bb ; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0 +; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo ; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1 +; GFX10-WAVE32-NEXT: s_mov_b32 s1, -1 ; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3 -; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6 -; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo -; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5 -; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6 -; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 +; GFX10-WAVE32-NEXT: ; %bb.1: ; %Flow +; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1 +; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB12_4 +; GFX10-WAVE32-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock ; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3 -; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0 -; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0 -; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5 -; GFX10-WAVE32-NEXT: .LBB12_5: +; GFX10-WAVE32-NEXT: s_branch .LBB12_2 +; GFX10-WAVE32-NEXT: .LBB12_4: ; %bb6 +; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo +; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_6 +; GFX10-WAVE32-NEXT: ; %bb.5: ; %bb6 +; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 +; GFX10-WAVE32-NEXT: .LBB12_6: ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm ; GFX10-WAVE32-NEXT: s_endpgm ; ; GFX11-LABEL: no_skip_no_successors: ; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 +; GFX11-NEXT: v_cmp_nge_f32_e64 s[0:1], s1, 0 +; GFX11-NEXT: s_mov_b64 s[2:3], exec ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_and_b64 vcc, exec, s[4:5] +; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX11-NEXT: s_mov_b64 s[0:1], -1 ; GFX11-NEXT: s_cbranch_vccz .LBB12_3 -; GFX11-NEXT: ; %bb.1: ; %bb6 -; GFX11-NEXT: s_mov_b64 s[2:3], exec -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: ; %bb.1: ; %Flow +; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] +; GFX11-NEXT: s_cbranch_vccnz .LBB12_4 +; GFX11-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock +; GFX11-NEXT: .LBB12_3: ; %bb3 +; GFX11-NEXT: s_branch .LBB12_2 +; GFX11-NEXT: .LBB12_4: ; %bb6 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec -; GFX11-NEXT: s_cbranch_scc0 .LBB12_5 -; GFX11-NEXT: ; %bb.2: ; %bb6 +; GFX11-NEXT: s_cbranch_scc0 .LBB12_6 +; GFX11-NEXT: ; %bb.5: ; %bb6 ; GFX11-NEXT: s_mov_b64 exec, 0 -; GFX11-NEXT: .LBB12_3: ; %bb3 -; GFX11-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] -; GFX11-NEXT: ; %bb.4: ; %bb5 -; GFX11-NEXT: .LBB12_5: +; GFX11-NEXT: .LBB12_6: ; GFX11-NEXT: s_mov_b64 exec, 0 ; GFX11-NEXT: exp mrt0 off, off, off, off done ; GFX11-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll +++ llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll @@ -1,5 +1,32 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-unify-divergent-exit-nodes -o - %s | FileCheck -check-prefix=UNIFY %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=structurizecfg -o - %s | FileCheck -check-prefix=STRZ %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s define void @test() #1 { + ; Make sure unreachable BB branch to unified return block + ; UNIFY-LABEL: entry.false.blk: ; preds = %entry + ; UNIFY-NEXT: call void @llvm.amdgcn.unreachable() + ; UNIFY-NEXT: br label %UnifiedReturnBlock + + ; Make sure unreachable BB is structurized + ; STRZ-LABEL: entry: + ; STRZ-NEXT: br i1 undef, label %entry.false.blk, label %Flow1 + ; STRZ-LABEL: Flow1: + ; STRZ-NEXT: %0 = phi i1 [ false, %entry.false.blk ], [ true, %entry ] + ; STRZ-NEXT: br i1 %0, label %entry.true.blk, label %UnifiedReturnBlock + ; STRZ-LABEL: entry.true.blk: + ; STRZ-NEXT: %idx = tail call i32 @llvm.amdgcn.workitem.id.x() + ; STRZ-NEXT: %exit.cmp = icmp ult i32 %idx, 3 + ; STRZ-NEXT: br i1 %exit.cmp, label %switch.blk, label %Flow + ; STRZ-LABEL: entry.false.blk: + ; STRZ-NEXT: call void @llvm.amdgcn.unreachable() + ; STRZ-NEXT: br label %Flow1 + ; STRZ-LABEL: switch.blk: + ; STRZ-NEXT: br label %Flow + ; STRZ-LABEL: Flow: + ; STRZ-NEXT: br label %UnifiedReturnBlock + ; STRZ-LABEL: UnifiedReturnBlock: + ; STRZ-NEXT: ret void + ; Clean up the unreachable blocks introduced with LowerSwitch pass. ; This test ensures that, in the pass flow, UnreachableBlockElim pass ; follows the LowerSwitch. Otherwise, this testcase will crash @@ -8,6 +35,7 @@ ; ; GCN-LABEL: name: test ; GCN: bb.{{[0-9]+}}.entry: + ; GCN: bb.{{[0-9]+}}.Flow1: ; GCN: bb.{{[0-9]+}}.entry.true.blk: ; GCN: bb.{{[0-9]+}}.entry.false.blk: ; GCN: bb.{{[0-9]+}}.switch.blk: @@ -17,7 +45,8 @@ ; GCN-NOT: bb.{{[0-9]+}}.unreach.blk: ; GCN-NOT: PHI - ; GCN: bb.{{[0-9]+}}.exit: + ; GCN: bb.{{[0-9]+}}.Flow: + ; GCN: bb.{{[0-9]+}}.UnifiedReturnBlock: entry: %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0 br i1 undef, label %entry.true.blk, label %entry.false.blk