Index: include/llvm/IR/Function.h =================================================================== --- include/llvm/IR/Function.h +++ include/llvm/IR/Function.h @@ -339,6 +339,9 @@ void setConvergent() { addFnAttr(Attribute::Convergent); } + void setNotConvergent() { + removeFnAttr(Attribute::Convergent); + } /// Determine if the function is known not to recurse, directly or /// indirectly. Index: lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/FunctionAttrs.cpp +++ lib/Transforms/IPO/FunctionAttrs.cpp @@ -930,6 +930,35 @@ return MadeChange; } +/// Remove convergent attributes where we can prove that none of our callees are +/// themselves convergent. +static bool removeConvergentAttrs(const CallGraphSCC &SCC) { + const CallGraphNode *CGN = *SCC.begin(); + Function *F = CGN->getFunction(); + if (!F || F->isDeclaration() || !F->isConvergent()) return false; + + // We can't mark F as not-convergent if any of its callees are convergent. + if (llvm::any_of(*CGN, [](const CallGraphNode::CallRecord &CR) { + Function *F = CR.second->getFunction(); + return !F || F->isConvergent(); + })) + return false; + + // CGN doesn't contain calls to intrinsics, so we have to iterate over F's + // body looking for those. + for (auto &BB : *F) + for (auto &I : BB) { + CallSite CS(cast(&I)); + if (!CS) continue; + Function *Callee = CS.getCalledFunction(); + if (Callee && Callee->isIntrinsic() && Callee->isConvergent()) + return false; + } + + F->setNotConvergent(); + return true; +} + static bool setDoesNotRecurse(Function &F) { if (F.doesNotRecurse()) return false; @@ -1006,6 +1035,7 @@ if (!ExternalNode) { Changed |= addNoAliasAttrs(SCCNodes); Changed |= addNonNullAttrs(SCCNodes, *TLI); + Changed |= removeConvergentAttrs(SCC); } Changed |= addNoRecurseAttrs(SCC); Index: test/Transforms/FunctionAttrs/convergent.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrs/convergent.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -basicaa -functionattrs -rpo-functionattrs -S | FileCheck %s + +; CHECK define i32 @nonleaf() #0 +define i32 @nonleaf() convergent { + %a = call i32 @leaf() + ret i32 %a +} + +; CHECK: define i32 @leaf() #0 +define i32 @leaf() convergent { + ret i32 0 +} + +declare i32 @k() convergent +; CHECK: define i32 @extern() #1 +define i32 @extern() convergent { + %a = call i32 @k() + ret i32 %a +} + +define i32 @call_extern() convergent { + %a = call i32 @extern() + ret i32 %a +} + +declare void @llvm.cuda.syncthreads() convergent +; CHECK: define i32 @intrinsic() #3 +define i32 @intrinsic() convergent { + call void @llvm.cuda.syncthreads() + ret i32 0 +} + +@xyz = global i32 ()* null +; CHECK: define i32 @functionptr() #1 { +define i32 @functionptr() convergent { + %1 = load i32 ()*, i32 ()** @xyz + %2 = call i32 %1() + ret i32 %2 +} + +; CHECK: attributes #0 = { norecurse readnone } +; CHECK: attributes #1 = { convergent } +; CHECK: attributes #3 = { convergent norecurse }