Index: llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -59,6 +59,10 @@ LoopInfo *LI; + // Keep track of basic block terminations the condition of which has been + // replaced by a call of if intrinsic. + DenseSet TermWithIfAdded; + void initialize(Module &M, const GCNSubtarget &ST); bool isUniform(BranchInst *T); @@ -203,6 +207,10 @@ /// Open a new "If" block bool SIAnnotateControlFlow::openIf(BranchInst *Term) { + if (TermWithIfAdded.contains(Term)) + return false; + TermWithIfAdded.insert(Term); + if (isUniform(Term)) return false; @@ -358,8 +366,13 @@ if (isTopOfStack(BB)) Changed |= closeControlFlow(BB); - if (DT->dominates(Term->getSuccessor(1), BB)) + if (DT->dominates(Term->getSuccessor(1), BB)) { Changed |= handleLoop(Term); + continue; + } + + if (!TermWithIfAdded.contains(Term)) + Changed |= openIf(Term); continue; } @@ -377,10 +390,8 @@ Changed |= openIf(Term); } - if (!Stack.empty()) { - // CFG was probably not structured. - report_fatal_error("failed to annotate CFG"); - } + for (auto I : Stack) + Changed |= closeControlFlow(I.first); return Changed; } Index: llvm/test/CodeGen/AMDGPU/si-annotate-cf-else-visited.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/si-annotate-cf-else-visited.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: kernel: ; @kernel +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dword s1, s[4:5], 0x10 +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmpk_lg_i32 s1, 0x100 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %if.then +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 +; CHECK-NEXT: ; %bb.2: ; %cond.false +; CHECK-NEXT: s_trap 2 +; CHECK-NEXT: .LBB0_3: ; %if.else +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 +; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc +; CHECK-NEXT: s_cbranch_execz .LBB0_6 +; CHECK-NEXT: ; %bb.4: ; %if.then3 +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_7 +; CHECK-NEXT: .LBB0_5: ; %if.end6.sink.split +; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: v_mov_b32_e32 v1, s0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_dword v0, v1, s[2:3] +; CHECK-NEXT: .LBB0_6: ; %UnifiedReturnBlock +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_7: ; %cond.false.i8 +; CHECK-NEXT: s_trap 2 +; CHECK-NEXT: ; divergent unreachable +; CHECK-NEXT: s_endpgm + +declare void @llvm.trap() +declare i32 @llvm.amdgcn.workitem.id.x() + +define amdgpu_kernel void @kernel(i32 %a, i32 addrspace(1)* %x, i32 noundef %n) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %cmp = icmp eq i32 %n, 256 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %cmp1 = icmp eq i32 %a, 0 + br i1 %cmp1, label %if.end6.sink.split, label %cond.false + +cond.false: + call void @llvm.trap() + unreachable + +if.else: + %cmp2 = icmp ult i32 %tid, 10 + br i1 %cmp2, label %if.then3, label %if.end6 + +if.then3: + %cmp1.i7 = icmp eq i32 %a, 0 + br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8 + +cond.false.i8: + call void @llvm.trap() + unreachable + +if.end6.sink.split: + %x1 = getelementptr inbounds i32, i32 addrspace(1)* %x, i32 %tid + store i32 %a, i32 addrspace(1)* %x1, align 4 + br label %if.end6 + +if.end6: + ret void +} Index: llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -142,7 +142,13 @@ ; SI-OPT: bb6: ; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) ; SI-OPT-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP3]], 3 -; SI-OPT-NEXT: br i1 [[TMP7]], label [[BB11:%.*]], label [[BB1:%.*]] +; SI-OPT-NEXT: [[TMP7A:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP7]]) +; SI-OPT-NEXT: [[TMP7B:%.*]] = extractvalue { i1, i64 } [[TMP7A]], 0 +; SI-OPT-NEXT: [[TMP7C:%.*]] = extractvalue { i1, i64 } [[TMP7A]], 1 +; SI-OPT-NEXT: br i1 [[TMP7B]], label [[BB11:%.*]], label [[BB6_BB1_CRIT_EDGE:%.*]] +; SI-OPT: bb6.bb1_crit_edge: +; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7C]]) +; SI-OPT-NEXT: br label [[BB1:%.*]] ; SI-OPT: bb8: ; SI-OPT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP3]], 1 ; SI-OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP9]])