Index: llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -262,7 +262,23 @@ BasicBlock *From = Phi->getIncomingBlock(i); if (From == IDom) { + // We're in the following situation: + // IDom/From + // | \ + // | If-block + // | / + // Parent + // where we want to break out of the loop if the If-block is not taken. + // Due to the depth-first traversal, there should be an end.cf + // intrinsic in Parent, and we insert an else.break before it. + // + // Note that the end.cf need not be the first non-phi instruction + // of parent, particularly when we're dealing with a multi-level + // break, but it should occur within a group of intrinsic calls + // at the beginning of the block. CallInst *OldEnd = dyn_cast(Parent->getFirstInsertionPt()); + while (OldEnd && OldEnd->getCalledFunction() != EndCf) + OldEnd = dyn_cast(OldEnd->getNextNode()); if (OldEnd && OldEnd->getCalledFunction() == EndCf) { Value *Args[] = { OldEnd->getArgOperand(0), NewPhi }; Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); Index: llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll +++ llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck %s + +; CHECK-LABEL: {{^}}define amdgpu_vs void @main +; CHECK: main_body: +; CHECK: LOOP.outer: +; CHECK: LOOP: +; CHECK: [[if:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if( +; CHECK: [[if_exec:%[0-9]+]] = extractvalue { i1, i64 } [[if]], 1 +; +; CHECK: Flow: +; +; Ensure two else.break calls, for both the inner and outer loops +; +; CHECK: call i64 @llvm.amdgcn.else.break(i64 [[if_exec]], +; CHECK-NEXT: call i64 @llvm.amdgcn.else.break(i64 [[if_exec]], +; CHECK-NEXT: call void @llvm.amdgcn.end.cf +; +; CHECK: Flow1: +define amdgpu_vs void @main(<4 x float> %vec, i32 %ub, i32 %cont) { +main_body: + br label %LOOP.outer + +LOOP.outer: ; preds = %ENDIF, %main_body + %tmp43 = phi i32 [ 0, %main_body ], [ %tmp47, %ENDIF ] + br label %LOOP + +LOOP: ; preds = %ENDIF, %LOOP.outer + %tmp45 = phi i32 [ %tmp43, %LOOP.outer ], [ %tmp47, %ENDIF ] + %tmp47 = add i32 %tmp45, 1 + %tmp48 = icmp slt i32 %tmp45, %ub + br i1 %tmp48, label %ENDIF, label %IF + +IF: ; preds = %LOOP + ret void + +ENDIF: ; preds = %LOOP + %tmp51 = icmp eq i32 %tmp47, %cont + br i1 %tmp51, label %LOOP, label %LOOP.outer +} + +attributes #0 = { nounwind readnone }