Index: llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -67,7 +67,7 @@ Value *popSaved(); - void push(BasicBlock *BB, Value *Saved); + void push(BasicBlock *BB, Value *Saved, unsigned Bury = 0); bool isElse(PHINode *Phi); @@ -81,7 +81,7 @@ Value * handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L, - BranchInst *Term); + BranchInst *Term); bool handleLoop(BranchInst *Term); @@ -161,8 +161,8 @@ } /// Push a BB and saved value to the control flow stack -void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) { - Stack.push_back(std::make_pair(BB, Saved)); +void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved, unsigned Bury) { + Stack.insert(Stack.end() - Bury, std::make_pair(BB, Saved)); } /// Can the condition represented by this PHI node treated like @@ -208,7 +208,14 @@ Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term); Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); - push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); + + // Sometimes the "then" block can already be on the stack. This happens + // When multiple conditional branches jump to the same basic block. + // In such cases, bury the "else" block under it instead of adding it on + // top of the stack. That way, we can still pop the blocks correctly + // when finishing the traversal. + push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term), + isTopOfStack(Term->getSuccessor(0)) ? 1 : 0); return true; } @@ -348,7 +355,10 @@ BranchInst *Term = dyn_cast(BB->getTerminator()); if (!Term || Term->isUnconditional()) { - if (isTopOfStack(BB)) + // We use 'while' here because we may need to close multiple + // control flows, e.g. when two different branches use the same + // then/else block, we'd have pushed that block twice on the stack. + while (isTopOfStack(BB)) Changed |= closeControlFlow(BB); continue; Index: llvm/test/CodeGen/AMDGPU/si-annotate-control-flow-condition-common-blocks.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/si-annotate-control-flow-condition-common-blocks.ll @@ -0,0 +1,629 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name OPT +; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs %s + +; Tests that the SIAnnotateControlFlow pass can correctly handle CFGs where multiple +; conditional branches have the same destination basic block. +; +; In such cases, the BB may be pushed multiple times onto the stack, and we must be able +; to pop all instances of it. + +; Two conditionals use bb22 as the then block. +define amdgpu_kernel void @alpha(i64 %arg1, i64 %arg2) { +; OPT-LABEL: @alpha( +; OPT-NEXT: bb: +; OPT-NEXT: [[TMP:%.*]] = sub i64 [[ARG1:%.*]], 0 +; OPT-NEXT: [[OPTTMP3:%.*]] = tail call i64 @llvm.smin.i64(i64 [[TMP]], i64 256) +; OPT-NEXT: [[OPTTMP4:%.*]] = trunc i64 [[OPTTMP3]] to i32 +; OPT-NEXT: [[OPTTMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: br i1 undef, label [[BB6:%.*]], label [[BB23:%.*]] +; OPT: bb6: +; OPT-NEXT: br label [[NODEBLOCK:%.*]] +; OPT: NodeBlock: +; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[LEAFBLOCK1:%.*]] +; OPT: LeafBlock1: +; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[BB8:%.*]], label [[BB9:%.*]] +; OPT: LeafBlock: +; OPT-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF]], label [[BB7:%.*]], label [[BB9]] +; OPT: bb7: +; OPT-NEXT: unreachable +; OPT: bb8: +; OPT-NEXT: unreachable +; OPT: bb9: +; OPT-NEXT: [[OPTTMP10:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP11:%.*]] = fcmp contract une double [[OPTTMP10]], 0.000000e+00 +; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP11]]) +; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; OPT-NEXT: br i1 [[TMP1]], label [[BB22:%.*]], label [[BB12:%.*]] +; OPT: bb12: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: br label [[NODEBLOCK7:%.*]] +; OPT: NodeBlock7: +; OPT-NEXT: [[PIVOT8:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT8]], label [[LEAFBLOCK3:%.*]], label [[LEAFBLOCK5:%.*]] +; OPT: LeafBlock5: +; OPT-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF6]], label [[BB14:%.*]], label [[BB15:%.*]] +; OPT: LeafBlock3: +; OPT-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF4]], label [[BB13:%.*]], label [[BB15]] +; OPT: bb13: +; OPT-NEXT: unreachable +; OPT: bb14: +; OPT-NEXT: unreachable +; OPT: bb15: +; OPT-NEXT: br label [[NODEBLOCK13:%.*]] +; OPT: NodeBlock13: +; OPT-NEXT: [[PIVOT14:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT14]], label [[LEAFBLOCK9:%.*]], label [[LEAFBLOCK11:%.*]] +; OPT: LeafBlock11: +; OPT-NEXT: [[SWITCHLEAF12:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF12]], label [[BB17:%.*]], label [[BB18:%.*]] +; OPT: LeafBlock9: +; OPT-NEXT: [[SWITCHLEAF10:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF10]], label [[BB16:%.*]], label [[BB18]] +; OPT: bb16: +; OPT-NEXT: unreachable +; OPT: bb17: +; OPT-NEXT: unreachable +; OPT: bb18: +; OPT-NEXT: [[OPTTMP19:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP20:%.*]] = fcmp contract une double [[OPTTMP19]], 0.000000e+00 +; OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP20]]) +; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP3]], 0 +; OPT-NEXT: [[TMP5:%.*]] = extractvalue { i1, i64 } [[TMP3]], 1 +; OPT-NEXT: br i1 [[TMP4]], label [[BB22]], label [[BB21:%.*]] +; OPT: bb21: +; OPT-NEXT: unreachable +; OPT: bb22: +; OPT-NEXT: call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) +; OPT-NEXT: br label [[BB26:%.*]] +; OPT: bb23: +; OPT-NEXT: [[OPTTMP24:%.*]] = icmp ult i32 [[OPTTMP5]], [[OPTTMP4]] +; OPT-NEXT: br i1 [[OPTTMP24]], label [[BB25:%.*]], label [[BB26]] +; OPT: bb25: +; OPT-NEXT: unreachable +; OPT: bb26: +; OPT-NEXT: ret void +; +bb: + %tmp = sub i64 %arg1, 0 + %tmp3 = tail call i64 @llvm.smin.i64(i64 %tmp, i64 256) + %tmp4 = trunc i64 %tmp3 to i32 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() + br i1 undef, label %bb6, label %bb23 + +bb6: ; preds = %bb + switch i64 undef, label %bb9 [ + i64 0, label %bb7 + i64 1, label %bb8 + ] + +bb7: ; preds = %bb6 + unreachable + +bb8: ; preds = %bb6 + unreachable + +bb9: ; preds = %bb6 + %tmp10 = call contract noundef double @wombat() + %tmp11 = fcmp contract une double %tmp10, 0.000000e+00 + br i1 %tmp11, label %bb22, label %bb12 + +bb12: ; preds = %bb9 + switch i64 undef, label %bb15 [ + i64 0, label %bb13 + i64 1, label %bb14 + ] + +bb13: ; preds = %bb12 + unreachable + +bb14: ; preds = %bb12 + unreachable + +bb15: ; preds = %bb12 + switch i64 undef, label %bb18 [ + i64 0, label %bb16 + i64 1, label %bb17 + ] + +bb16: ; preds = %bb15 + unreachable + +bb17: ; preds = %bb15 + unreachable + +bb18: ; preds = %bb15 + %tmp19 = call contract noundef double @wombat() + %tmp20 = fcmp contract une double %tmp19, 0.000000e+00 + br i1 %tmp20, label %bb22, label %bb21 + +bb21: ; preds = %bb18 + unreachable + +bb22: ; preds = %bb18, %bb9 + call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) + br label %bb26 + +bb23: ; preds = %bb + %tmp24 = icmp ult i32 %tmp5, %tmp4 + br i1 %tmp24, label %bb25, label %bb26 + +bb25: ; preds = %bb23 + unreachable + +bb26: ; preds = %bb23, %bb22 + ret void +} + +; Two conditionals use bb22 as the else block. +define amdgpu_kernel void @beta(i64 %arg1, i64 %arg2) { +; OPT-LABEL: @beta( +; OPT-NEXT: bb: +; OPT-NEXT: [[TMP:%.*]] = sub i64 [[ARG1:%.*]], 0 +; OPT-NEXT: [[OPTTMP3:%.*]] = tail call i64 @llvm.smin.i64(i64 [[TMP]], i64 256) +; OPT-NEXT: [[OPTTMP4:%.*]] = trunc i64 [[OPTTMP3]] to i32 +; OPT-NEXT: [[OPTTMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: br i1 undef, label [[BB6:%.*]], label [[BB23:%.*]] +; OPT: bb6: +; OPT-NEXT: br label [[NODEBLOCK:%.*]] +; OPT: NodeBlock: +; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[LEAFBLOCK1:%.*]] +; OPT: LeafBlock1: +; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[BB8:%.*]], label [[BB9:%.*]] +; OPT: LeafBlock: +; OPT-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF]], label [[BB7:%.*]], label [[BB9]] +; OPT: bb7: +; OPT-NEXT: unreachable +; OPT: bb8: +; OPT-NEXT: unreachable +; OPT: bb9: +; OPT-NEXT: [[OPTTMP10:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP11:%.*]] = fcmp contract une double [[OPTTMP10]], 0.000000e+00 +; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP11]]) +; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; OPT-NEXT: br i1 [[TMP1]], label [[BB12:%.*]], label [[BB22:%.*]] +; OPT: bb12: +; OPT-NEXT: br label [[NODEBLOCK7:%.*]] +; OPT: NodeBlock7: +; OPT-NEXT: [[PIVOT8:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT8]], label [[LEAFBLOCK3:%.*]], label [[LEAFBLOCK5:%.*]] +; OPT: LeafBlock5: +; OPT-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF6]], label [[BB14:%.*]], label [[BB15:%.*]] +; OPT: LeafBlock3: +; OPT-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF4]], label [[BB13:%.*]], label [[BB15]] +; OPT: bb13: +; OPT-NEXT: unreachable +; OPT: bb14: +; OPT-NEXT: unreachable +; OPT: bb15: +; OPT-NEXT: br label [[NODEBLOCK13:%.*]] +; OPT: NodeBlock13: +; OPT-NEXT: [[PIVOT14:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT14]], label [[LEAFBLOCK9:%.*]], label [[LEAFBLOCK11:%.*]] +; OPT: LeafBlock11: +; OPT-NEXT: [[SWITCHLEAF12:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF12]], label [[BB17:%.*]], label [[BB18:%.*]] +; OPT: LeafBlock9: +; OPT-NEXT: [[SWITCHLEAF10:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF10]], label [[BB16:%.*]], label [[BB18]] +; OPT: bb16: +; OPT-NEXT: unreachable +; OPT: bb17: +; OPT-NEXT: unreachable +; OPT: bb18: +; OPT-NEXT: [[OPTTMP19:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP20:%.*]] = fcmp contract une double [[OPTTMP19]], 0.000000e+00 +; OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP20]]) +; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP3]], 0 +; OPT-NEXT: [[TMP5:%.*]] = extractvalue { i1, i64 } [[TMP3]], 1 +; OPT-NEXT: br i1 [[TMP4]], label [[BB21:%.*]], label [[BB18_BB22_CRIT_EDGE:%.*]] +; OPT: bb18.bb22_crit_edge: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]]) +; OPT-NEXT: br label [[BB22]] +; OPT: bb21: +; OPT-NEXT: unreachable +; OPT: bb22: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) +; OPT-NEXT: br label [[BB26:%.*]] +; OPT: bb23: +; OPT-NEXT: [[OPTTMP24:%.*]] = icmp ult i32 [[OPTTMP5]], [[OPTTMP4]] +; OPT-NEXT: br i1 [[OPTTMP24]], label [[BB25:%.*]], label [[BB26]] +; OPT: bb25: +; OPT-NEXT: unreachable +; OPT: bb26: +; OPT-NEXT: ret void +; +bb: + %tmp = sub i64 %arg1, 0 + %tmp3 = tail call i64 @llvm.smin.i64(i64 %tmp, i64 256) + %tmp4 = trunc i64 %tmp3 to i32 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() + br i1 undef, label %bb6, label %bb23 + +bb6: ; preds = %bb + switch i64 undef, label %bb9 [ + i64 0, label %bb7 + i64 1, label %bb8 + ] + +bb7: ; preds = %bb6 + unreachable + +bb8: ; preds = %bb6 + unreachable + +bb9: ; preds = %bb6 + %tmp10 = call contract noundef double @wombat() + %tmp11 = fcmp contract une double %tmp10, 0.000000e+00 + br i1 %tmp11, label %bb12, label %bb22 + +bb12: ; preds = %bb9 + switch i64 undef, label %bb15 [ + i64 0, label %bb13 + i64 1, label %bb14 + ] + +bb13: ; preds = %bb12 + unreachable + +bb14: ; preds = %bb12 + unreachable + +bb15: ; preds = %bb12 + switch i64 undef, label %bb18 [ + i64 0, label %bb16 + i64 1, label %bb17 + ] + +bb16: ; preds = %bb15 + unreachable + +bb17: ; preds = %bb15 + unreachable + +bb18: ; preds = %bb15 + %tmp19 = call contract noundef double @wombat() + %tmp20 = fcmp contract une double %tmp19, 0.000000e+00 + br i1 %tmp20, label %bb21, label %bb22 + +bb21: ; preds = %bb18 + unreachable + +bb22: ; preds = %bb18, %bb9 + call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) + br label %bb26 + +bb23: ; preds = %bb + %tmp24 = icmp ult i32 %tmp5, %tmp4 + br i1 %tmp24, label %bb25, label %bb26 + +bb25: ; preds = %bb23 + unreachable + +bb26: ; preds = %bb23, %bb22 + ret void +} + +; First conditional uses bb22 as the "then" block, then second uses +; it as the "else" block. +define amdgpu_kernel void @charlie(i64 %arg1, i64 %arg2) { +; OPT-LABEL: @charlie( +; OPT-NEXT: bb: +; OPT-NEXT: [[TMP:%.*]] = sub i64 [[ARG1:%.*]], 0 +; OPT-NEXT: [[OPTTMP3:%.*]] = tail call i64 @llvm.smin.i64(i64 [[TMP]], i64 256) +; OPT-NEXT: [[OPTTMP4:%.*]] = trunc i64 [[OPTTMP3]] to i32 +; OPT-NEXT: [[OPTTMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: br i1 undef, label [[BB6:%.*]], label [[BB23:%.*]] +; OPT: bb6: +; OPT-NEXT: br label [[NODEBLOCK:%.*]] +; OPT: NodeBlock: +; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[LEAFBLOCK1:%.*]] +; OPT: LeafBlock1: +; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[BB8:%.*]], label [[BB9:%.*]] +; OPT: LeafBlock: +; OPT-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF]], label [[BB7:%.*]], label [[BB9]] +; OPT: bb7: +; OPT-NEXT: unreachable +; OPT: bb8: +; OPT-NEXT: unreachable +; OPT: bb9: +; OPT-NEXT: [[OPTTMP10:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP11:%.*]] = fcmp contract une double [[OPTTMP10]], 0.000000e+00 +; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP11]]) +; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; OPT-NEXT: br i1 [[TMP1]], label [[BB22:%.*]], label [[BB12:%.*]] +; OPT: bb12: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: br label [[NODEBLOCK7:%.*]] +; OPT: NodeBlock7: +; OPT-NEXT: [[PIVOT8:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT8]], label [[LEAFBLOCK3:%.*]], label [[LEAFBLOCK5:%.*]] +; OPT: LeafBlock5: +; OPT-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF6]], label [[BB14:%.*]], label [[BB15:%.*]] +; OPT: LeafBlock3: +; OPT-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF4]], label [[BB13:%.*]], label [[BB15]] +; OPT: bb13: +; OPT-NEXT: unreachable +; OPT: bb14: +; OPT-NEXT: unreachable +; OPT: bb15: +; OPT-NEXT: br label [[NODEBLOCK13:%.*]] +; OPT: NodeBlock13: +; OPT-NEXT: [[PIVOT14:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT14]], label [[LEAFBLOCK9:%.*]], label [[LEAFBLOCK11:%.*]] +; OPT: LeafBlock11: +; OPT-NEXT: [[SWITCHLEAF12:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF12]], label [[BB17:%.*]], label [[BB18:%.*]] +; OPT: LeafBlock9: +; OPT-NEXT: [[SWITCHLEAF10:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF10]], label [[BB16:%.*]], label [[BB18]] +; OPT: bb16: +; OPT-NEXT: unreachable +; OPT: bb17: +; OPT-NEXT: unreachable +; OPT: bb18: +; OPT-NEXT: [[OPTTMP19:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP20:%.*]] = fcmp contract une double [[OPTTMP19]], 0.000000e+00 +; OPT-NEXT: br i1 [[OPTTMP20]], label [[BB21:%.*]], label [[BB22]] +; OPT: bb21: +; OPT-NEXT: unreachable +; OPT: bb22: +; OPT-NEXT: call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) +; OPT-NEXT: br label [[BB26:%.*]] +; OPT: bb23: +; OPT-NEXT: [[OPTTMP24:%.*]] = icmp ult i32 [[OPTTMP5]], [[OPTTMP4]] +; OPT-NEXT: br i1 [[OPTTMP24]], label [[BB25:%.*]], label [[BB26]] +; OPT: bb25: +; OPT-NEXT: unreachable +; OPT: bb26: +; OPT-NEXT: ret void +; +bb: + %tmp = sub i64 %arg1, 0 + %tmp3 = tail call i64 @llvm.smin.i64(i64 %tmp, i64 256) + %tmp4 = trunc i64 %tmp3 to i32 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() + br i1 undef, label %bb6, label %bb23 + +bb6: ; preds = %bb + switch i64 undef, label %bb9 [ + i64 0, label %bb7 + i64 1, label %bb8 + ] + +bb7: ; preds = %bb6 + unreachable + +bb8: ; preds = %bb6 + unreachable + +bb9: ; preds = %bb6 + %tmp10 = call contract noundef double @wombat() + %tmp11 = fcmp contract une double %tmp10, 0.000000e+00 + br i1 %tmp11, label %bb22, label %bb12 + +bb12: ; preds = %bb9 + switch i64 undef, label %bb15 [ + i64 0, label %bb13 + i64 1, label %bb14 + ] + +bb13: ; preds = %bb12 + unreachable + +bb14: ; preds = %bb12 + unreachable + +bb15: ; preds = %bb12 + switch i64 undef, label %bb18 [ + i64 0, label %bb16 + i64 1, label %bb17 + ] + +bb16: ; preds = %bb15 + unreachable + +bb17: ; preds = %bb15 + unreachable + +bb18: ; preds = %bb15 + %tmp19 = call contract noundef double @wombat() + %tmp20 = fcmp contract une double %tmp19, 0.000000e+00 + br i1 %tmp20, label %bb21, label %bb22 + +bb21: ; preds = %bb18 + unreachable + +bb22: ; preds = %bb18, %bb9 + call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) + br label %bb26 + +bb23: ; preds = %bb + %tmp24 = icmp ult i32 %tmp5, %tmp4 + br i1 %tmp24, label %bb25, label %bb26 + +bb25: ; preds = %bb23 + unreachable + +bb26: ; preds = %bb23, %bb22 + ret void +} + +; First conditional uses bb22 as the "else" block, then second uses +; it as the "then" block. +define amdgpu_kernel void @delta(i64 %arg1, i64 %arg2) { +; OPT-LABEL: @delta( +; OPT-NEXT: bb: +; OPT-NEXT: [[TMP:%.*]] = sub i64 [[ARG1:%.*]], 0 +; OPT-NEXT: [[OPTTMP3:%.*]] = tail call i64 @llvm.smin.i64(i64 [[TMP]], i64 256) +; OPT-NEXT: [[OPTTMP4:%.*]] = trunc i64 [[OPTTMP3]] to i32 +; OPT-NEXT: [[OPTTMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; OPT-NEXT: br i1 undef, label [[BB6:%.*]], label [[BB23:%.*]] +; OPT: bb6: +; OPT-NEXT: br label [[NODEBLOCK:%.*]] +; OPT: NodeBlock: +; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[LEAFBLOCK1:%.*]] +; OPT: LeafBlock1: +; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[BB8:%.*]], label [[BB9:%.*]] +; OPT: LeafBlock: +; OPT-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF]], label [[BB7:%.*]], label [[BB9]] +; OPT: bb7: +; OPT-NEXT: unreachable +; OPT: bb8: +; OPT-NEXT: unreachable +; OPT: bb9: +; OPT-NEXT: [[OPTTMP10:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP11:%.*]] = fcmp contract une double [[OPTTMP10]], 0.000000e+00 +; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP11]]) +; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; OPT-NEXT: br i1 [[TMP1]], label [[BB12:%.*]], label [[BB22:%.*]] +; OPT: bb12: +; OPT-NEXT: br label [[NODEBLOCK7:%.*]] +; OPT: NodeBlock7: +; OPT-NEXT: [[PIVOT8:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT8]], label [[LEAFBLOCK3:%.*]], label [[LEAFBLOCK5:%.*]] +; OPT: LeafBlock5: +; OPT-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF6]], label [[BB14:%.*]], label [[BB15:%.*]] +; OPT: LeafBlock3: +; OPT-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF4]], label [[BB13:%.*]], label [[BB15]] +; OPT: bb13: +; OPT-NEXT: unreachable +; OPT: bb14: +; OPT-NEXT: unreachable +; OPT: bb15: +; OPT-NEXT: br label [[NODEBLOCK13:%.*]] +; OPT: NodeBlock13: +; OPT-NEXT: [[PIVOT14:%.*]] = icmp slt i64 undef, 1 +; OPT-NEXT: br i1 [[PIVOT14]], label [[LEAFBLOCK9:%.*]], label [[LEAFBLOCK11:%.*]] +; OPT: LeafBlock11: +; OPT-NEXT: [[SWITCHLEAF12:%.*]] = icmp eq i64 undef, 1 +; OPT-NEXT: br i1 [[SWITCHLEAF12]], label [[BB17:%.*]], label [[BB18:%.*]] +; OPT: LeafBlock9: +; OPT-NEXT: [[SWITCHLEAF10:%.*]] = icmp eq i64 undef, 0 +; OPT-NEXT: br i1 [[SWITCHLEAF10]], label [[BB16:%.*]], label [[BB18]] +; OPT: bb16: +; OPT-NEXT: unreachable +; OPT: bb17: +; OPT-NEXT: unreachable +; OPT: bb18: +; OPT-NEXT: [[OPTTMP19:%.*]] = call contract noundef double @wombat() +; OPT-NEXT: [[OPTTMP20:%.*]] = fcmp contract une double [[OPTTMP19]], 0.000000e+00 +; OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[OPTTMP20]]) +; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP3]], 0 +; OPT-NEXT: [[TMP5:%.*]] = extractvalue { i1, i64 } [[TMP3]], 1 +; OPT-NEXT: br i1 [[TMP4]], label [[BB22]], label [[BB21:%.*]] +; OPT: bb21: +; OPT-NEXT: unreachable +; OPT: bb22: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) +; OPT-NEXT: br label [[BB26:%.*]] +; OPT: bb23: +; OPT-NEXT: [[OPTTMP24:%.*]] = icmp ult i32 [[OPTTMP5]], [[OPTTMP4]] +; OPT-NEXT: br i1 [[OPTTMP24]], label [[BB25:%.*]], label [[BB26]] +; OPT: bb25: +; OPT-NEXT: unreachable +; OPT: bb26: +; OPT-NEXT: ret void +; +bb: + %tmp = sub i64 %arg1, 0 + %tmp3 = tail call i64 @llvm.smin.i64(i64 %tmp, i64 256) + %tmp4 = trunc i64 %tmp3 to i32 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() + br i1 undef, label %bb6, label %bb23 + +bb6: ; preds = %bb + switch i64 undef, label %bb9 [ + i64 0, label %bb7 + i64 1, label %bb8 + ] + +bb7: ; preds = %bb6 + unreachable + +bb8: ; preds = %bb6 + unreachable + +bb9: ; preds = %bb6 + %tmp10 = call contract noundef double @wombat() + %tmp11 = fcmp contract une double %tmp10, 0.000000e+00 + br i1 %tmp11, label %bb12, label %bb22 + +bb12: ; preds = %bb9 + switch i64 undef, label %bb15 [ + i64 0, label %bb13 + i64 1, label %bb14 + ] + +bb13: ; preds = %bb12 + unreachable + +bb14: ; preds = %bb12 + unreachable + +bb15: ; preds = %bb12 + switch i64 undef, label %bb18 [ + i64 0, label %bb16 + i64 1, label %bb17 + ] + +bb16: ; preds = %bb15 + unreachable + +bb17: ; preds = %bb15 + unreachable + +bb18: ; preds = %bb15 + %tmp19 = call contract noundef double @wombat() + %tmp20 = fcmp contract une double %tmp19, 0.000000e+00 + br i1 %tmp20, label %bb22, label %bb21 + +bb21: ; preds = %bb18 + unreachable + +bb22: ; preds = %bb18, %bb9 + call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* null) + br label %bb26 + +bb23: ; preds = %bb + %tmp24 = icmp ult i32 %tmp5, %tmp4 + br i1 %tmp24, label %bb25, label %bb26 + +bb25: ; preds = %bb23 + unreachable + +bb26: ; preds = %bb23, %bb22 + ret void +} + +declare i64 @llvm.smin.i64(i64, i64) +declare i32 @llvm.amdgcn.workitem.id.x() +declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture) + +declare double @wombat()