diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -946,11 +946,6 @@ if (Cycle->contains(DivTermBlock)) return nullptr; - if (Cycle->isReducible()) { - assert(Cycle->getHeader() == JoinBlock); - return nullptr; - } - const auto *Parent = Cycle->getParentCycle(); while (Parent && !Parent->contains(DivTermBlock)) { // If the join is inside a child, then the parent must be @@ -961,6 +956,11 @@ Parent = Cycle->getParentCycle(); } + if (Cycle->isReducible()) { + assert(Cycle->getHeader() == JoinBlock); + return nullptr; + } + LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n"); return Cycle; } diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll @@ -0,0 +1,24 @@ +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; CHECK: CYCLES ASSSUMED DIVERGENT: +; CHECK-NEXT: depth=1: entries(if.end16 for.cond1) for.body4 + +define void @foo(i1 %b) { +entry: + br i1 %b, label %if.then, label %if.end16 + +if.then: ; preds = %entry + br label %for.cond1 + +for.cond1: ; preds = %if.end16, %for.body4, %if.then + br i1 false, label %for.body4, label %if.end16 + +for.body4: ; preds = %for.cond1 + br label %for.cond1 + +if.end16: ; preds = %for.cond1, %entry + br label %for.cond1 +}