Index: lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -264,8 +264,17 @@ Term->setCondition(BoolTrue); Value *Arg = handleLoopCondition(Cond, Broken, L, Term); - for (BasicBlock *Pred : predecessors(Target)) - Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred); + for (BasicBlock *Pred : predecessors(Target)) { + Value *PHIValue = Int64Zero; + if (Pred == BB) // Remember the value of the previous iteration. + PHIValue = Arg; + // If the backedge from Pred to Target could be executed before the exit + // of the loop at BB, it should not reset or change "Broken", which keeps + // track of the number of threads exited the loop at BB. + else if (L->contains(Pred) && DT->dominates(Pred, BB)) + PHIValue = Broken; + Broken->addIncoming(PHIValue, Pred); + } Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); Index: test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll @@ -0,0 +1,49 @@ +; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s + + +; OPT-LABEL: @multiple_backedges( +; OPT: loop: ; preds = %loop_end, %loop, %entry +; OPT-NEXT: %phi.broken1 = phi i64 [ %2, %loop_end ], [ %phi.broken1, %loop ], [ 0, %entry ] +; OPT-NEXT: %phi.broken = phi i64 [ 0, %loop_end ], [ %0, %loop ], [ 0, %entry ] + + +; OPT: %tmp6 = icmp slt i32 %arg, %tmp5 +; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp6, i64 %phi.broken) +; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0) +; OPT-NEXT: br i1 %1, label %loop_end, label %loop + +; OPT: loop_end: ; preds = %loop +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0) +; OPT-NEXT: %exit = icmp sgt i32 %tmp5, %tmp2 +; OPT-NEXT: %2 = call i64 @llvm.amdgcn.if.break(i1 %exit, i64 %phi.broken1) +; OPT-NEXT: %3 = call i1 @llvm.amdgcn.loop(i64 %2) +; OPT-NEXT: br i1 %3, label %loop_exit, label %loop + +; OPT: loop_exit: ; preds = %loop_end +; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %2) +define amdgpu_kernel void @multiple_backedges(i32 %arg, i32* %arg1) { +entry: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp2 = shl nsw i32 %arg, 1 + br label %loop + +loop: + %tmp4 = phi i32 [ 0, %entry ], [ %tmp5, %loop ], [ 0, %loop_end ] + %tmp5 = add nsw i32 %tmp4, %tmp + %tmp6 = icmp slt i32 %arg, %tmp5 + br i1 %tmp6, label %loop_end, label %loop + +loop_end: + %exit = icmp sgt i32 %tmp5, %tmp2 + br i1 %exit, label %loop_exit, label %loop + +loop_exit: + %tmp12 = zext i32 %tmp to i64 + %tmp13 = getelementptr inbounds i32, i32* %arg1, i64 %tmp12 + %tmp14 = addrspacecast i32* %tmp13 to i32 addrspace(1)* + store i32 %tmp5, i32 addrspace(1)* %tmp14, align 4 + ret void +} + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x()