Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -37,7 +37,6 @@ #include #include #include -#include namespace llvm { @@ -161,14 +160,21 @@ } }; -static bool operator==(ValueInfo A, ValueInfo B) { - assert(A.Ref && B.Ref); - return A.getGUID() == B.getGUID(); +inline bool operator==(const ValueInfo &A, const ValueInfo &B) { + assert(A.getRef() && B.getRef() && + "Need ValueInfo with non-null Ref to compare GUIDs"); + return A.getRef() == B.getRef(); } +inline bool operator!=(const ValueInfo &A, const ValueInfo &B) { + assert(A.getRef() && B.getRef() && + "Need ValueInfo with non-null Ref to compare GUIDs"); + return A.getGUID() != B.getGUID(); +} inline bool operator<(const ValueInfo &A, const ValueInfo &B) { - assert(A.Ref && B.Ref && "Need ValueInfo with non-null Ref to compare GUIDs"); + assert(A.getRef() && B.getRef() && + "Need ValueInfo with non-null Ref to compare GUIDs"); return A.getGUID() < B.getGUID(); } @@ -414,7 +420,7 @@ return FunctionSummary( FunctionSummary::GVFlags( GlobalValue::LinkageTypes::AvailableExternallyLinkage, - /*NotEligibleToImport=*/true, /*Live=*/true), + /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false), 0, FunctionSummary::FFlags{}, std::vector(), std::move(Edges), std::vector(), std::vector(), @@ -545,6 +551,8 @@ TIdInfo = llvm::make_unique(); TIdInfo->TypeTests.push_back(Guid); } + + friend struct GraphTraits; }; template <> struct DenseMapInfo { @@ -743,24 +751,37 @@ // Calculate the callgraph root FunctionSummary calculateCallGraphRoot() { + // Functinos that have a parent will be mark in FunctionHasParent pair. + // Once we've marked all functions, the functions in the map that are false + // have no parent (so they're the roots) std::map FunctionHasParent; std::function discoverNodes = [&](ValueInfo V) { if (!V.getSummaryList().size()) return; // skip external functions that don't have summaries // Mark discovered if we haven't yet - FunctionHasParent.emplace(std::make_pair(V, false)); + auto S = FunctionHasParent.emplace(V, false); + + // Stop if we've already discovered this node + if (!S.second) + return; FunctionSummary *F = dyn_cast(V.getSummaryList().front().get()); assert(F != nullptr && "Expected FunctionSummary node"); for (auto &C : F->calls()) { - auto S = FunctionHasParent.find(C.first); - if (S != FunctionHasParent.end() && S->second) + // Insert node if necessary + auto S = FunctionHasParent.emplace(C.first, true); + + // Skip nodes that we're sure have parents + if (!S.second && S.first->second) continue; - FunctionHasParent[C.first] = true; - discoverNodes(C.first); + + if (S.second) + discoverNodes(C.first); + else + S.first->second = true; } }; @@ -769,7 +790,7 @@ if (!S.second.SummaryList.size() || !isa(S.second.SummaryList.front().get())) continue; - discoverNodes(getValueInfo(S.first)); + discoverNodes(ValueInfo(IsAnalysis, &S)); } std::vector Edges; @@ -779,7 +800,10 @@ continue; // skip over non-root nodes Edges.push_back(std::make_pair(P.first, CalleeInfo{})); } - assert(Edges.size() && "Couldn't find any roots in index callgraph!"); + if (Edges.empty()) { + // Failed to find root - return an empty node + return FunctionSummary::makeDummyFunctionSummary({}); + } auto CallGraphRoot = FunctionSummary::makeDummyFunctionSummary(Edges); return CallGraphRoot; } @@ -807,7 +831,7 @@ return ValueInfo(IsAnalysis, getOrInsertValuePtr(GUID)); } - /// Return a ValueInfo for \p GUID setting value \p Name. + /// Return a ValueInfo for \p GUID setting value \p Name. ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID, StringRef Name) { assert(!IsAnalysis); auto VP = getOrInsertValuePtr(GUID); @@ -982,6 +1006,75 @@ /// Export summary to dot file for GraphViz. void exportToDot(raw_ostream& OS) const; + + /// Print out strongly connected components for debugging. + void dumpSCCs(raw_ostream &OS); +}; + +/// Gets the first available function summary from a ValueInfo +static FunctionSummary *getFrontFunctionSummary(ValueInfo V) { + FunctionSummary *F; + GlobalValueSummary *Front = V.getSummaryList().front().get(); + switch (Front->getSummaryKind()) { + case GlobalValueSummary::SummaryKind::AliasKind: { + AliasSummary *S = cast(Front); + F = cast(&S->getAliasee()); + break; + } + case GlobalValueSummary::SummaryKind::FunctionKind: { + F = cast(Front); + break; + } + default: + // We can't get a function summary + return NULL; + } + return F; +} + +/// GraphTraits definition to build SCC for the index +template <> struct GraphTraits { + typedef ValueInfo NodeRef; + + static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) { + return P.first; + } + using ChildIteratorType = + mapped_iterator::iterator, + decltype(&valueInfoFromEdge)>; + + static NodeRef getEntryNode(ValueInfo V) { return V; } + + static ChildIteratorType child_begin(NodeRef N) { + if (!N.getSummaryList().size()) // handle external function + return ChildIteratorType( + FunctionSummary::ExternalNode.CallGraphEdgeList.begin(), + &valueInfoFromEdge); + FunctionSummary *F = getFrontFunctionSummary(N); + return ChildIteratorType(F->CallGraphEdgeList.begin(), &valueInfoFromEdge); + } + + static ChildIteratorType child_end(NodeRef N) { + if (!N.getSummaryList().size()) // handle external function + return ChildIteratorType( + FunctionSummary::ExternalNode.CallGraphEdgeList.end(), + &valueInfoFromEdge); + FunctionSummary *F = getFrontFunctionSummary(N); + return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge); + } +}; + +template <> +struct GraphTraits : public GraphTraits { + static NodeRef getEntryNode(ModuleSummaryIndex *I) { + std::unique_ptr Root = + make_unique(I->calculateCallGraphRoot()); + GlobalValueSummaryInfo G(I->isPerformingAnalysis()); + G.SummaryList.push_back(std::move(Root)); + static auto P = + GlobalValueSummaryMapTy::value_type(GlobalValue::GUID(0), std::move(G)); + return ValueInfo(I->isPerformingAnalysis(), &P); + } }; } // end namespace llvm Index: include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- include/llvm/Transforms/IPO/FunctionImport.h +++ include/llvm/Transforms/IPO/FunctionImport.h @@ -114,6 +114,15 @@ ModuleSummaryIndex &Index, const DenseSet &GUIDPreservedSymbols); +/// Propagate function attributes for function summaries along the index's +/// callgraph during thinlink +bool propagateFunctionAttrs(ModuleSummaryIndex &Index); + +/// Inserts the FunctionAttr flags from the Index (ReadNone, ReadOnly, +/// NoRecurse, NoAlias) into \p TheModule. +void thinLTOInsertFunctionAttrsForModule(Module &TheModule, + const GVSummaryMapTy &DefinedGlobals); + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. // Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -13,10 +13,15 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; +FunctionSummary FunctionSummary::ExternalNode = + FunctionSummary::makeDummyFunctionSummary({}); + // Collect for the given module the list of function it defines // (GUID -> Summary). void ModuleSummaryIndex::collectDefinedFunctionsForModule( @@ -71,6 +76,26 @@ return false; } +// TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) +// then delete this function and update its tests +LLVM_DUMP_METHOD +void ModuleSummaryIndex::dumpSCCs(raw_ostream &O) { + for (scc_iterator I = + scc_begin(this); + !I.isAtEnd(); ++I) { + O << "SCC (" << utostr(I->size()) << " node" << (I->size() == 1 ? "" : "s") + << ") {\n"; + for (const ValueInfo V : *I) { + FunctionSummary *F = nullptr; + if (V.getSummaryList().size()) + F = cast(V.getSummaryList().front().get()); + O << " " << (F == nullptr ? "External" : "") << " " << utostr(V.getGUID()) + << (I.hasLoop() ? " (has loop)" : "") << "\n"; + } + O << "}\n"; + } +} + namespace { struct Attributes { void add(const Twine &Name, const Twine &Value, Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -50,6 +50,10 @@ #define DEBUG_TYPE "lto" +static cl::opt + DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden, + cl::desc("Dump the SCCs in the ThinLTO index's callgraph")); + // The values are (type identifier, summary) pairs. typedef DenseMap< GlobalValue::GUID, @@ -1095,12 +1099,16 @@ if (!ModuleToDefinedGVSummaries.count(Mod.first)) ModuleToDefinedGVSummaries.try_emplace(Mod.first); + propagateFunctionAttrs(ThinLTO.CombinedIndex); StringMap ImportLists( ThinLTO.ModuleMap.size()); StringMap ExportLists( ThinLTO.ModuleMap.size()); StringMap> ResolvedODR; + if (DumpThinCGSCCs) + ThinLTO.CombinedIndex.dumpSCCs(outs()); + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -420,6 +420,7 @@ renameModuleForThinLTO(Mod, CombinedIndex); + thinLTOInsertFunctionAttrsForModule(Mod, DefinedGlobals); thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals); if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -13,13 +13,14 @@ #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" @@ -64,6 +65,8 @@ STATISTIC(NumImportedModules, "Number of modules imported from"); STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index"); STATISTIC(NumLiveSymbols, "Number of live symbols in index"); +STATISTIC(NumNoRecurse, + "Number of of functions marked norecurse from summary propagation"); /// Limit on instruction count of imported functions. static cl::opt ImportInstrLimit( @@ -493,6 +496,60 @@ #endif } +/// Propagates NoRecurse attributes throughout +/// the Index (uses same method as FunctionAttrs pass) +bool llvm::propagateFunctionAttrs(ModuleSummaryIndex &Index) { + // TODO: implement addNoAliasAttrs once + // there's more information about the return type in the summary + + auto addNoRecurseAttrs = [](std::vector &SCCNodes) { + if (SCCNodes.size() != 1) + return false; + + ValueInfo V = SCCNodes.front(); + + // Bail if we don't have a FunctionSummary to work with + if (!V.getSummaryList().size()) + return false; + + FunctionSummary *F = getFrontFunctionSummary(V); + + // The dummy root node has guid of 0, so skip it + if (V.getGUID() == 0 || F->fflags().NoRecurse) + return false; + + bool calleesMightRecurse = std::any_of( + F->calls().begin(), F->calls().end(), + [](const FunctionSummary::EdgeTy &E) { + if (E.first.getGUID() == 0 || !E.first.getSummaryList().size()) + return true; // might recurse - we can't reason about external + // functions + FunctionSummary *CFS = getFrontFunctionSummary(E.first); + return !CFS->fflags().NoRecurse; + }); + + if (calleesMightRecurse) + return false; + + if (!F->fflags().NoRecurse) { + F->fflags().NoRecurse = true; + NumNoRecurse++; + return true; + } + return false; + }; + + bool Changed = false; + + // Call propagation functions on each SCC in the Index + for (scc_iterator I = scc_begin(&Index); !I.isAtEnd(); + ++I) { + std::vector Nodes(*I); + Changed |= addNoRecurseAttrs(Nodes); + } + return Changed; +} + void llvm::computeDeadSymbols( ModuleSummaryIndex &Index, const DenseSet &GUIDPreservedSymbols) { @@ -603,6 +660,24 @@ return std::error_code(); } +/// Insert function attributes in the Index back into the \p TheModule. +void llvm::thinLTOInsertFunctionAttrsForModule( + Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { + for (Function &F : TheModule) { + const auto &GV = DefinedGlobals.find(F.getGUID()); + if (GV == DefinedGlobals.end()) + return; + + FunctionSummary *FS = cast(GV->second); + if (FS->fflags().ReadNone) + F.setDoesNotAccessMemory(); + if (FS->fflags().ReadOnly) + F.setOnlyReadsMemory(); + if (FS->fflags().NoRecurse) + F.setDoesNotRecurse(); + } +} + /// Fixup WeakForLinker linkages in \p TheModule based on summary analysis. void llvm::thinLTOResolveWeakForLinkerModule( Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { Index: test/ThinLTO/X86/Inputs/function-attr-prop-a.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/Inputs/function-attr-prop-a.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @c() norecurse { + ret i32 4 +} Index: test/ThinLTO/X86/functionattr-prop.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/functionattr-prop.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as %s -o %t1.o +; RUN: llvm-as %p/Inputs/function-attr-prop-a.ll -o %t2.o +; RUN: llvm-lto2 run -o %t3.o %t2.o %t1.o -r %t2.o,c,px -r %t1.o,d,px -r %t1.o,c,l -save-temps +; RUN: llvm-dis < %t3.o.0.4.opt.bc -o - | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @c() +; CHECK: define i32 @d() {{.*}} #0 +define i32 @d() { + call i32 @c() + ret i32 1; +} +; CHECK: #0 = { norecurse Index: test/ThinLTO/X86/module_summary_graph_traits.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/module_summary_graph_traits.ll @@ -0,0 +1,58 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: llvm-lto2 run -print-summary-global-ids -dump-thin-cg-sccs %t1.bc -o %t.index.bc \ +; RUN: -r %t1.bc,external,px -r %t1.bc,l2,pl -r %t1.bc,l1,pl \ +; RUN: -r %t1.bc,simple,pl -r %t1.bc,root,pl 2>&1 | FileCheck %s + +; CHECK: 5224464028922159466{{.*}} is external +; CHECK: 765152853862302398{{.*}} is l2 +; CHECK: 17000277804057984823{{.*}} is l1 +; CHECK: 15440740835768581517{{.*}} is simple +; CHECK: 5800840261926955363{{.*}} is root + +; CHECK: SCC (2 nodes) { +; CHECK-NEXT: {{^}} 17000277804057984823 (has loop) +; CHECK-NEXT: {{^}} 765152853862302398 (has loop) +; CHECK-NEXT: } + +; CHECK: SCC (1 node) { +; CHECK-NEXT: {{^}} 15440740835768581517{{$}} +; CHECK-NEXT: } + +; CHECK: SCC (1 node) { +; CHECK-NEXT: External 5224464028922159466{{$}} +; CHECK-NEXT: } + +; CHECK: SCC (1 node) { +; CHECK-NEXT: {{^}} 5800840261926955363{{$}} +; CHECK-NEXT: } + +; Dummy call graph root that points at all roots of the callgraph. +; CHECK: SCC (1 node) { +; CHECK-NEXT: {{^}} 0{{$}} +; CHECK-NEXT: } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @external() + +define void @l2() { + call void @l1() + ret void +} + +define void @l1() { + call void @l2() + ret void +} + +define i32 @simple() { + ret i32 23 +} + +define void @root() { + call void @l1() + call i32 @simple() + call void @external() + ret void +}