diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -348,7 +348,10 @@ BranchInst *Term = dyn_cast(BB->getTerminator()); if (!Term || Term->isUnconditional()) { - if (isTopOfStack(BB)) + // We use 'while' here because we may need to close multiple + // control flows, e.g. when two different branches use the same + // then/else block, we'd have pushed that block twice on the stack. + while (isTopOfStack(BB)) Changed |= closeControlFlow(BB); continue; diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-control-flow-condition-common-blocks.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-control-flow-condition-common-blocks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-control-flow-condition-common-blocks.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name OPT +; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s + +define amdgpu_kernel void @ham(i64 %arg1, i64 %arg2) { +; OPT-LABEL: @ham( +; OPT-NEXT: bb: +; OPT-NEXT: [[TMP:%.*]] = sub i64 [[ARG1:%.*]], 0 +; OPT-NEXT: [[OPTTMP3:%.*]] = tail call i64 @llvm.smin.i64(i64 [[TMP]], i64 256) +; OPT-NEXT: [[OPTTMP4:%.*]] = trunc i64 [[OPTTMP3]] to i32 +; OPT-NEXT: [[OPTTMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: br i1 undef, label [[BB6:%.*]], label [[BB23:%.*]] +; OPT: bb6: +; OPT-NEXT: br label [[NODEBLOCK:%.*]] +; OPT: NodeBlock: +; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[LEAFBLOCK1:%.*]] +; OPT: LeafBlock1: +; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[BB8:%.*]], label [[BB9:%.*]] +; OPT: LeafBlock: +; OPT-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF]], label [[BB7:%.*]], label [[BB9]] +; OPT: bb7: +; OPT-NEXT: unreachable +; OPT: bb8: +; OPT-NEXT: unreachable +; OPT: bb9: +; OPT-NEXT: [[OPTTMP10:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP11:%.*]] = fcmp contract une double [[OPTTMP10]], 0.000000e+00 +; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP11]]) +; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; OPT-NEXT: br i1 [[TMP1]], label [[BB12:%.*]], label [[BB22:%.*]] +; OPT: bb12: +; OPT-NEXT: br label [[NODEBLOCK7:%.*]] +; OPT: NodeBlock7: +; OPT-NEXT: [[PIVOT8:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT8]], label [[LEAFBLOCK3:%.*]], label [[LEAFBLOCK5:%.*]] +; OPT: LeafBlock5: +; OPT-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF6]], label [[BB14:%.*]], label [[BB15:%.*]] +; OPT: LeafBlock3: +; OPT-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF4]], label [[BB13:%.*]], label [[BB15]] +; OPT: bb13: +; OPT-NEXT: unreachable +; OPT: bb14: +; OPT-NEXT: unreachable +; OPT: bb15: +; OPT-NEXT: br label [[NODEBLOCK13:%.*]] +; OPT: NodeBlock13: +; OPT-NEXT: [[PIVOT14:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT14]], label [[LEAFBLOCK9:%.*]], label [[LEAFBLOCK11:%.*]] +; OPT: LeafBlock11: +; OPT-NEXT: [[SWITCHLEAF12:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF12]], label [[BB17:%.*]], label [[BB18:%.*]] +; OPT: LeafBlock9: +; OPT-NEXT: [[SWITCHLEAF10:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF10]], label [[BB16:%.*]], label [[BB18]] +; OPT: bb16: +; OPT-NEXT: unreachable +; OPT: bb17: +; OPT-NEXT: unreachable +; OPT: bb18: +; OPT-NEXT: [[OPTTMP19:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP20:%.*]] = fcmp contract une double [[OPTTMP19]], 0.000000e+00 +; OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP20]]) +; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP3]], 0 +; OPT-NEXT: [[TMP5:%.*]] = extractvalue { i1, i64 } [[TMP3]], 1 +; OPT-NEXT: br i1 [[TMP4]], label [[BB21:%.*]], label [[BB18_BB22_CRIT_EDGE:%.*]] +; OPT: bb18.bb22_crit_edge: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]]) +; OPT-NEXT: br label [[BB22]] +; OPT: bb21: +; OPT-NEXT: unreachable +; OPT: bb22: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) +; OPT-NEXT: br label [[BB26:%.*]] +; OPT: bb23: +; OPT-NEXT: [[OPTTMP24:%.*]] = icmp ult i32 [[OPTTMP5]], [[OPTTMP4]] +; OPT-NEXT: br i1 [[OPTTMP24]], label [[BB25:%.*]], label [[BB26]] +; OPT: bb25: +; OPT-NEXT: unreachable +; OPT: bb26: +; OPT-NEXT: ret void +; +bb: + %tmp = sub i64 %arg1, 0 + %tmp3 = tail call i64 @llvm.smin.i64(i64 %tmp, i64 256) + %tmp4 = trunc i64 %tmp3 to i32 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() + br i1 undef, label %bb6, label %bb23 + +bb6: ; preds = %bb + switch i64 undef, label %bb9 [ + i64 0, label %bb7 + i64 1, label %bb8 + ] + +bb7: ; preds = %bb6 + unreachable + +bb8: ; preds = %bb6 + unreachable + +bb9: ; preds = %bb6 + %tmp10 = call contract noundef double @wombat() + %tmp11 = fcmp contract une double %tmp10, 0.000000e+00 + br i1 %tmp11, label %bb12, label %bb22 + +bb12: ; preds = %bb9 + switch i64 undef, label %bb15 [ + i64 0, label %bb13 + i64 1, label %bb14 + ] + +bb13: ; preds = %bb12 + unreachable + +bb14: ; preds = %bb12 + unreachable + +bb15: ; preds = %bb12 + switch i64 undef, label %bb18 [ + i64 0, label %bb16 + i64 1, label %bb17 + ] + +bb16: ; preds = %bb15 + unreachable + +bb17: ; preds = %bb15 + unreachable + +bb18: ; preds = %bb15 + %tmp19 = call contract noundef double @wombat() + %tmp20 = fcmp contract une double %tmp19, 0.000000e+00 + br i1 %tmp20, label %bb21, label %bb22 + +bb21: ; preds = %bb18 + unreachable + +bb22: ; preds = %bb18, %bb9 + call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) + br label %bb26 + +bb23: ; preds = %bb + %tmp24 = icmp ult i32 %tmp5, %tmp4 + br i1 %tmp24, label %bb25, label %bb26 + +bb25: ; preds = %bb23 + unreachable + +bb26: ; preds = %bb23, %bb22 + ret void +} + +declare i64 @llvm.smin.i64(i64, i64) +declare i32 @llvm.amdgcn.workitem.id.x() +declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture) + +declare double @wombat()