Index: include/llvm/Transforms/IPO/InlinerStats.h =================================================================== --- /dev/null +++ include/llvm/Transforms/IPO/InlinerStats.h @@ -0,0 +1,77 @@ +#ifndef LLVM_TRANSFORMS_IPO_INLINERSTATS_H +#define LLVM_TRANSFORMS_IPO_INLINERSTATS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include + +/// InlinerStatistics - calculating and dumping statistics on performed inlines. +/// It calculates statistics summarized stats like: +/// (1) Number of inlined imported functions, +/// (2) Number of inlined imporrted functions to importing module (indirect) +/// (3) Number of inlined non imported functions to importing module (indirect) +/// The difference between first and the second is that first stat counts +/// all performed inlines on imported functions, but the second one only the +/// functions that have been eventually inlined to a function in the importing +/// module (by a chain of inlines). Because llvm uses bottom-up inliner, it is +/// possible +/// to e.g. import function A, B and then inline B to A, +/// and after this A might be too big to be inlined into some other function +/// that calls it. It calculates the real values by building graph, where +/// the nodes are functions, and edges are performed inlines and then by marking +/// the edges starting from not imported function. +/// +/// If `EnableListStats` is set to true, then it also dumps statistics +/// per each inlined function, sorted by the greatest inlines count like +/// - number of performed inlines +/// - number of performed real inlines +class InlinerStatistics { +private: + /// InlineGraphNode represents node in graph of inlined functions. + struct InlineGraphNode { + // Default constructible and movable. + InlineGraphNode() = default; + InlineGraphNode(InlineGraphNode &&) = default; + InlineGraphNode &operator=(InlineGraphNode &&) = default; + InlineGraphNode(const InlineGraphNode &) = delete; + InlineGraphNode &operator=(const InlineGraphNode &) = delete; + + llvm::SmallVector InlinedCallees; + /// Incremented every direct inline. + int16_t NumberOfInlines = 0; + /// Number of inlines that leads to not imported function. + /// Computed based on graph search. + int16_t NumberOfRealInlines = 0; + bool Imported = false; + bool Visited = false; + }; + + using NodesMapTy = llvm::DenseMap; + friend InlinerStatistics &getInlinerStatistics(bool EnableListStats); + +public: + void addInlinedFunction(llvm::Function *Caller, llvm::Function *Callee); + void dumpStats(); + +private: + InlinerStatistics(bool EnableListStats); + void calculateRealInlines(); + void dfs(InlineGraphNode *const GraphNode); + + using SortedNodesTy = std::vector; + // Clears NodesMap and returns vector of elements sorted by + // (-NumberOfInlines, -NumberOfRealInlines, FunctionName). + SortedNodesTy getSortedNodes(); + +private: + NodesMapTy NodesMap; + /// Non external functions that have some other function inlined inside. + std::vector NonImportedCallers; + bool EnableListStats; +}; + +/// Returns InlinerStatistics singleton. +InlinerStatistics &getInlinerStatistics(bool EnableListStats); + +#endif // LLVM_TRANSFORMS_IPO_INLINERSTATS_H Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -17,6 +17,7 @@ InlineAlways.cpp InlineSimple.cpp Inliner.cpp + InlinerStats.cpp Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/IPO/InlinerStats.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -47,6 +48,14 @@ // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); +static cl::opt + EnableInlineGraphStats("enable-import-graph-stats", cl::init(false), + cl::Hidden, cl::desc("Enable inline graph stats")); + +static cl::opt EnableListStats( + "enable-list-stats", cl::init(false), cl::Hidden, + cl::desc("Enable printing of statistics for each inlined function")); + Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} Inliner::Inliner(char &ID, bool InsertLifetime) @@ -63,7 +72,6 @@ CallGraphSCCPass::getAnalysisUsage(AU); } - typedef DenseMap > InlinedArrayAllocasTy; @@ -78,6 +86,7 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { + // Callee and Caller information will be gone in CS after inlining. Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -94,6 +103,9 @@ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; + if (EnableInlineGraphStats) + getInlinerStatistics(EnableListStats).addInlinedFunction(Caller, Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we @@ -568,6 +580,8 @@ /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { + if (EnableInlineGraphStats) + getInlinerStatistics(EnableListStats).dumpStats(); return removeDeadFunctions(CG); } Index: lib/Transforms/IPO/InlinerStats.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/InlinerStats.cpp @@ -0,0 +1,118 @@ +#include "llvm/Transforms/IPO/InlinerStats.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +InlinerStatistics::InlinerStatistics(bool EnableListStats) + : EnableListStats(EnableListStats) { + NonImportedCallers.reserve(200); +} + +void InlinerStatistics::addInlinedFunction(Function *Caller, Function *Callee) { + assert(Caller && Callee); + auto &CallerNode = NodesMap[Caller]; + CallerNode.Imported = Caller->getMetadata("thinlto_src_module") != nullptr; + + auto &CalleeNode = NodesMap[Callee]; + CalleeNode.Imported = Callee->getMetadata("thinlto_src_module") != nullptr; + CalleeNode.NumberOfInlines++; + + if (!CallerNode.Imported && !CalleeNode.Imported) { + // Direct inline form not imported callee to not imported caller. It is + // basically small optimization to not put this into graph, so for modules + // without imported functions there won't be any graph traversal. + CalleeNode.NumberOfRealInlines++; + return; + } + + CallerNode.InlinedCallees.push_back(&CalleeNode); + if (!CallerNode.Imported) + // Save Caller as a starting node for traversal. + NonImportedCallers.push_back(Caller); +} + +void InlinerStatistics::dumpStats() { + calculateRealInlines(); + NonImportedCallers.clear(); + + int32_t InlinedImportedFunctionsCount = 0, + InlinedImportedFunctionsToImportingModuleCount = 0, + InlinedNotImportedFunctionsToImportingModuleCount = 0; + + const auto SortedNodes = getSortedNodes(); + dbgs() << "------- Dumping inliner stats -------\n"; + for (const auto &Node : SortedNodes) { + if (Node.second.Imported) { + InlinedImportedFunctionsCount += (Node.second.NumberOfInlines > 0) * 1; + InlinedImportedFunctionsToImportingModuleCount += + (Node.second.NumberOfRealInlines > 0) * 1; + } else { + InlinedNotImportedFunctionsToImportingModuleCount = + (Node.second.NumberOfRealInlines > 0) * 1; + } + + assert(Node.second.NumberOfInlines >= Node.second.NumberOfRealInlines); + // No more inlined functions. + if (Node.second.NumberOfInlines == 0) + break; + if (EnableListStats) + dbgs() << "Inlined " + << (Node.second.Imported ? "imported " : "not imported ") + << "function [" << Node.first->getName() << "]" + << ": #inlines = " << Node.second.NumberOfInlines + << ", #inlines_to_importing_module = " + << Node.second.NumberOfRealInlines << "\n"; + } + + dbgs() << "Number of inlined imported functions: " + << InlinedImportedFunctionsCount + << "\nNumber of inlined imported functions to importing module: " + << InlinedImportedFunctionsToImportingModuleCount + << "\nNumber of inlined not imported functions to importing module: " + << InlinedNotImportedFunctionsToImportingModuleCount << "\n"; +} + +void InlinerStatistics::calculateRealInlines() { + // Removing duplicated Callers. + std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); + + NonImportedCallers.erase( + std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), + NonImportedCallers.end()); + for (const auto *F : NonImportedCallers) + dfs(&NodesMap[F]); +} + +void InlinerStatistics::dfs(InlineGraphNode *const GraphNode) { + GraphNode->Visited = true; + for (auto *const InlinedFunctionNode : GraphNode->InlinedCallees) { + InlinedFunctionNode->NumberOfRealInlines++; + if (!InlinedFunctionNode->Visited) + dfs(InlinedFunctionNode); + } +} + +InlinerStatistics::SortedNodesTy InlinerStatistics::getSortedNodes() { + SortedNodesTy SortedNodes(std::make_move_iterator(NodesMap.begin()), + std::make_move_iterator(NodesMap.end())); + NodesMap.clear(); + + std::sort(SortedNodes.begin(), SortedNodes.end(), + [](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs.second.NumberOfInlines != Rhs.second.NumberOfInlines) + return Lhs.second.NumberOfInlines > Rhs.second.NumberOfInlines; + if (Lhs.second.NumberOfRealInlines != + Rhs.second.NumberOfRealInlines) + return Lhs.second.NumberOfRealInlines > + Rhs.second.NumberOfRealInlines; + return Lhs.first->getName() < Rhs.first->getName(); + }); + return SortedNodes; +} + +InlinerStatistics &getInlinerStatistics(bool EnableListStats) { + static InlinerStatistics Graph(EnableListStats); + return Graph; +} Index: test/Transforms/Inline/inline_stats.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline_stats.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -inline -enable-import-graph-stats -enable-list-stats < %s 2>&1 | FileCheck %s + +; CHECK: ------- Dumping inliner stats ------- +; CHECK: Inlined not imported function [internal2]: #inlines = 6, #inlines_to_importing_module = 2 +; CHECK: Inlined imported function [external2]: #inlines = 4, #inlines_to_importing_module = 1 +; CHECK: Inlined imported function [external1]: #inlines = 3, #inlines_to_importing_module = 2 +; CHECK: Inlined imported function [external5]: #inlines = 1, #inlines_to_importing_module = 1 +; CHECK: Inlined imported function [external3]: #inlines = 1, #inlines_to_importing_module = 0 + +; CHECK: Number of inlined imported functions: 4 +; CHECK: Number of inlined imported functions to importing module: 3 +; CHECK: Number of inlined not imported functions to importing module: 1 + +define void @internal() { + call fastcc void @external1() + call fastcc void @internal2() + call coldcc void @external_big() + ret void +} + +define void @internal2() alwaysinline { + ret void +} + +define void @internal3() { + call fastcc void @external1() + call fastcc void @external5() + ret void +} + +define void @external1() alwaysinline !thinlto_src_module !0 { + call fastcc void @internal2() + call fastcc void @external2(); + ret void +} + +define void @external2() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external3() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external4() !thinlto_src_module !1 { + call fastcc void @external1() + call fastcc void @external2() + ret void +} + +define void @external5() !thinlto_src_module !1 { + ret void +} + +; Assume big pice of code here. This function won't be inlined, so all the +; inlined function it will have won't affect real inlines. +define void @external_big() noinline !thinlto_src_module !1 { +; CHECK-NOT: call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + +; CHECK-NOT: call fastcc void @external2() + call fastcc void @external2() + call fastcc void @external2() +; CHECK-NOT: call fastcc void @external3() + call fastcc void @external3() + ret void +} + +; It should not be imported, but it should not break anything. +define void @external_notcalled() !thinlto_src_module !0 { + call void @external_notcalled() + ret void +} + +!0 = !{!"file.cc"} +!1 = !{!"other.cc"}