Index: include/llvm/Transforms/IPO/InlinerStats.h =================================================================== --- /dev/null +++ include/llvm/Transforms/IPO/InlinerStats.h @@ -0,0 +1,64 @@ +#ifndef LLVM_TRANSFORMS_IPO_INLINERSTATS_H +#define LLVM_TRANSFORMS_IPO_INLINERSTATS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include + +/// InlinerStatistics - calculating and dumping statistics on performed inlines. +/// It calculates statistics summarized stats like: +/// (1) Number of inlined imported functions, +/// (2) Number of real inlined imported functions +/// (3) Number of real not external inlined functions +/// The difference between first and the second is that first stat counts +/// all performed inlines, and second one only the functions that really have +/// been inlined to some not imported function. Because llvm uses bottom-up +/// inliner, it is possible to e.g. import function A, B and then inline B to A, +/// and after this A might be too big to be inlined into some other function +/// that calls it. It calculates the real values by building graph, where +/// the nodes are functions, and edges are performed inlines. +/// Then starting from non external functions that have some inlined calls +/// inside, it walks to every inlined function and increment counter. +/// +/// If `EnableListStats` is set to true, then it also dumps statistics +/// per each inlined function, sorted by the greatest inlines count like +/// - number of performed inlines +/// - number of performed real inlines +class InlinerStatistics { +private: + struct InlineGraphNode { + llvm::SmallVector InlinedFunctions; + int16_t NumberOfInlines = 0; // Incremented every direct inline. + int16_t NumberOfRealInlines = 0; // Computed based on graph. + bool Imported = false; + }; + + using NodesMapTy = llvm::DenseMap; + friend InlinerStatistics &getInlinerStatistics(bool EnableListStats); + +public: + void addInlinedFunction(llvm::Function *Fun, llvm::Function *Inlined); + void dumpStats(); + +private: + InlinerStatistics(bool EnableListStats); + void calculateRealInlines(); + void dfs(InlineGraphNode *const GraphNode); + + using SortedNodesTy = std::vector; + // Clears NodesMap and returns vector of elements sorted by + // (-NumberOfInlines, -NumberOfRealInlines, FunctioName). + SortedNodesTy getSortedNodes(); + +private: + NodesMapTy NodesMap; + // Non external functions that have some other function inlined inside. + std::vector NonExternalFunctions; + bool EnableListStats; +}; + +/// Returns InlinerStatistics singleton. +InlinerStatistics &getInlinerStatistics(bool EnableListStats); + +#endif // LLVM_TRANSFORMS_IPO_INLINERSTATS_H Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -17,6 +17,7 @@ InlineAlways.cpp InlineSimple.cpp Inliner.cpp + InlinerStats.cpp Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/IPO/InlinerStats.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -47,6 +48,14 @@ // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); +static cl::opt + EnableInlineGraphStats("enable-import-graph-stats", cl::init(false), + cl::Hidden, cl::desc("Enable inline graph stats")); + +static cl::opt EnableListStats( + "enable-list-stats", cl::init(false), cl::Hidden, + cl::desc("Enable printing of statistics for each inlined function")); + Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} Inliner::Inliner(char &ID, bool InsertLifetime) @@ -63,7 +72,6 @@ CallGraphSCCPass::getAnalysisUsage(AU); } - typedef DenseMap > InlinedArrayAllocasTy; @@ -78,6 +86,7 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { + // Calle and Caller information will be gone in CS after inlining. Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -94,6 +103,9 @@ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; + if (EnableInlineGraphStats) + getInlinerStatistics(EnableListStats).addInlinedFunction(Caller, Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we @@ -568,6 +580,8 @@ /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { + if (EnableInlineGraphStats) + getInlinerStatistics(EnableListStats).dumpStats(); return removeDeadFunctions(CG); } Index: lib/Transforms/IPO/InlinerStats.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/InlinerStats.cpp @@ -0,0 +1,100 @@ +#include "llvm/Transforms/IPO/InlinerStats.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +InlinerStatistics::InlinerStatistics(bool EnableListStats) + : EnableListStats(EnableListStats) { + NonExternalFunctions.reserve(200); +} + +void InlinerStatistics::addInlinedFunction(Function *Fun, Function *Inlined) { + assert(Fun && Inlined); + auto &FunNode = NodesMap[Fun]; + FunNode.Imported = Fun->getMetadata("thinlto_src_module") != nullptr; + if (!FunNode.Imported) + NonExternalFunctions.push_back(Fun); + + auto &InlinedNode = NodesMap[Inlined]; + InlinedNode.Imported = Inlined->getMetadata("thinlto_src_module") != nullptr; + InlinedNode.NumberOfInlines++; + FunNode.InlinedFunctions.push_back(&InlinedNode); +} + +void InlinerStatistics::dumpStats() { + calculateRealInlines(); + NonExternalFunctions.clear(); + + int32_t NumberOFUniqueInlinedImportedFunctions = 0, + NumberOfRealUniqueInlinedImportedFunctions = 0, + NumberOFRealUniqueInlinedNotExternalFunctions = 0; + + auto SortedNodes = getSortedNodes(); + for (const auto &Node : SortedNodes) { + if (Node.second.Imported) { + NumberOFUniqueInlinedImportedFunctions += + (Node.second.NumberOfInlines > 0) * 1; + NumberOfRealUniqueInlinedImportedFunctions += + (Node.second.NumberOfRealInlines > 0) * 1; + } else { + NumberOFRealUniqueInlinedNotExternalFunctions = + (Node.second.NumberOfRealInlines > 0) * 1; + } + + assert(Node.second.NumberOfInlines >= Node.second.NumberOfRealInlines); + // No more inlined functions. + if (Node.second.NumberOfInlines == 0) + break; + if (EnableListStats) + dbgs() << "Inlined " + << (Node.second.Imported ? "imported " : "not external ") + << "function [" << Node.first->getName() << "]" + << ": #inlines = " << Node.second.NumberOfInlines + << ", #real_inlines = " << Node.second.NumberOfRealInlines << "\n"; + } + + dbgs() << "Number of inlined imported functions: " + << NumberOFUniqueInlinedImportedFunctions + << "\nNumber of real inlined imported functions: " + << NumberOfRealUniqueInlinedImportedFunctions + << "\nNumber of real not external inlined functions: " + << NumberOFRealUniqueInlinedNotExternalFunctions << "\n"; +} + +void InlinerStatistics::calculateRealInlines() { + for (const auto *F : NonExternalFunctions) { + dfs(&NodesMap[F]); + } +} + +void InlinerStatistics::dfs(InlineGraphNode *const GraphNode) { + for (auto *const InlinedFunctionNode : GraphNode->InlinedFunctions) { + InlinedFunctionNode->NumberOfRealInlines++; + dfs(InlinedFunctionNode); + } +} + +InlinerStatistics::SortedNodesTy InlinerStatistics::getSortedNodes() { + SortedNodesTy SortedNodes(std::make_move_iterator(NodesMap.begin()), + std::make_move_iterator(NodesMap.end())); + NodesMap.clear(); + + std::sort(SortedNodes.begin(), SortedNodes.end(), + [](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs.second.NumberOfInlines != Rhs.second.NumberOfInlines) + return Lhs.second.NumberOfInlines > Rhs.second.NumberOfInlines; + if (Lhs.second.NumberOfRealInlines != + Rhs.second.NumberOfRealInlines) + return Lhs.second.NumberOfRealInlines > + Rhs.second.NumberOfRealInlines; + return Lhs.first->getName() < Rhs.first->getName(); + }); + return SortedNodes; +} + +InlinerStatistics &getInlinerStatistics(bool EnableListStats) { + static InlinerStatistics Graph(EnableListStats); + return Graph; +} Index: test/Transforms/Inline/inline_stats.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline_stats.ll @@ -0,0 +1,61 @@ +; RUN: opt -S -inline -enable-import-graph-stats -enable-list-stats < %s 2>&1 | FileCheck %s + +; CHECK: Inlined not external function [internal2]: #inlines = 5, #real_inlines = 1 +; CHECK: Inlined imported function [external2]: #inlines = 3, #real_inlines = 1 +; CHECK: Inlined imported function [external1]: #inlines = 1, #real_inlines = 1 +; CHECK: Inlined imported function [external3]: #inlines = 1, #real_inlines = 0 + +; CHECK: Number of inlined imported functions: 3 +; CHECK: Number of real inlined imported functions: 2 +; CHECK: Number of real not external inlined functions: 1 + +define void @internal() { + call fastcc void @external1() + call coldcc void @external_big() + ret void +} + +define void @internal2() alwaysinline { + ret void +} + +define void @external1() alwaysinline !thinlto_src_module !0 { + call fastcc void @internal2() + call fastcc void @external2(); + ret void +} + +define void @external2() alwaysinline !thinlto_src_module !1 { + ret void +} + + +define void @external3() alwaysinline !thinlto_src_module !1 { + ret void +} + +; Assume big pice of code here. This function won't be inlined, so all the +; inlined function it will have won't affect real inlines. +define void @external_big() noinline !thinlto_src_module !1 { +; CHECK-NOT: call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + +; CHECK-NOT: call fastcc void @external2() + call fastcc void @external2() + call fastcc void @external2() +; CHECK-NOT: call fastcc void @external3() + call fastcc void @external3() + ret void +} + +; It should not be imported, but it should not break anything. +define void @external_notcalled() !thinlto_src_module !0 { + call void @external_notcalled() + ret void +} + +!0 = !{!"file.cc"} +!1 = !{!"other.cc"}