diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -195,8 +195,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { auto &PDT = getAnalysis().getPostDomTree(); - if (PDT.getRoots().size() <= 1) - return false; LegacyDivergenceAnalysis &DA = getAnalysis(); @@ -321,7 +319,7 @@ if (ReturningBlocks.empty()) return false; // No blocks return - if (ReturningBlocks.size() == 1) + if (ReturningBlocks.size() == 1 && !InsertExport) return false; // Already has a single return block const TargetTransformInfo &TTI diff --git a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll --- a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll @@ -45,6 +45,22 @@ ret void } +; test the case where there's only a kill in an infinite loop +; CHECK-LABEL: only_kill +; CHECK: exp null off, off, off, off done vm +; CHECK-NEXT: s_endpgm +; SIInsertSkips inserts an extra null export here, but it should be harmless. +; CHECK: exp null off, off, off, off done vm +; CHECK-NEXT: s_endpgm +define amdgpu_ps void @only_kill() #0 { +main_body: + br label %loop + +loop: + call void @llvm.amdgcn.kill(i1 false) #3 + br label %loop +} + ; In case there's an epilog, we shouldn't have to do this. ; CHECK-LABEL: return_nonvoid ; CHECK-NOT: exp null off, off, off, off done vm