Index: include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- include/llvm/Transforms/IPO/InlinerPass.h +++ include/llvm/Transforms/IPO/InlinerPass.h @@ -18,6 +18,7 @@ #define LLVM_TRANSFORMS_IPO_INLINERPASS_H #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" namespace llvm { class AssumptionCacheTracker; @@ -87,6 +88,7 @@ protected: AssumptionCacheTracker *ACT; ProfileSummaryInfo *PSI; + ImportedFunctionsInliningStatistics ImportedFunctionsStats; }; } // End llvm namespace Index: include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h @@ -0,0 +1,99 @@ +//===-- ImportedFunctionsInliningStats.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions, mostly useful for +// ThinLTO. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H +#define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include + +namespace llvm { +class Module; +class Function; +/// \brief Calculate and dump ThinLTO specific inliner stats. +/// The main statistics are: +/// (1) Number of inlined imported functions, +/// (2) Number of imported functions inlined into importing module (indirect), +/// (3) Number of non imported functions inlined into importing module +/// (indirect). +/// The difference between first and the second is that first stat counts +/// all performed inlines on imported functions, but the second one only the +/// functions that have been eventually inlined to a function in the importing +/// module (by a chain of inlines). Because llvm uses bottom-up inliner, it is +/// possible to e.g. import function `A`, `B` and then inline `B` to `A`, +/// and after this `A` might be too big to be inlined into some other function +/// that calls it. It calculates this statistic by building graph, where +/// the nodes are functions, and edges are performed inlines and then by marking +/// the edges starting from not imported function. +/// +/// If `Verbose` is set to true, then it also dumps statistics +/// per each inlined function, sorted by the greatest inlines count like +/// - number of performed inlines +/// - number of performed inlines to importing module +class ImportedFunctionsInliningStatistics { +private: + /// InlineGraphNode represents node in graph of inlined functions. + struct InlineGraphNode { + // Default-constructible and movable. + InlineGraphNode() = default; + InlineGraphNode(InlineGraphNode &&) = default; + InlineGraphNode &operator=(InlineGraphNode &&) = default; + InlineGraphNode(const InlineGraphNode &) = delete; + InlineGraphNode &operator=(const InlineGraphNode &) = delete; + + llvm::SmallVector InlinedCallees; + /// Incremented every direct inline. + int32_t NumberOfInlines = 0; + /// Number of inlines into non imported function (possibly indirect via + /// intermediate inlines). Computed based on graph search. + int32_t NumberOfRealInlines = 0; + bool Imported = false; + bool Visited = false; + }; + + // Store unique_ptrs because + using NodesMapTy = llvm::DenseMap>; + +public: + ImportedFunctionsInliningStatistics() = default; + ImportedFunctionsInliningStatistics( + const ImportedFunctionsInliningStatistics &) = delete; + + /// Record inline of @param Callee to @param Caller for statistis. + void recordInline(const Function &Caller, const Function &Callee); + /// Dump stats computed with InlinerStatistics class. + /// If @param Verbose is true then separate statistics for every inlined + /// function will be printed. + void dump(bool Verbose, const Module &M); + +private: + void calculateRealInlines(); + void dfs(InlineGraphNode &GraphNode); + + using SortedNodesTy = std::vector; + /// Clears NodesMap and returns vector of elements sorted by + /// (-NumberOfInlines, -NumberOfRealInlines, FunctionName). + SortedNodesTy getSortedNodes(); + +private: + NodesMapTy NodesMap; + /// Non external functions that have some other function inlined inside. + std::vector NonImportedCallers; + /// Stores functions name. It is required because function could be deleted + /// before dumping stats. + llvm::DenseMap NameMap; +}; + +} // llvm + +#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -47,6 +47,24 @@ // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); +namespace { +enum class InlinerFunctionImportStatsOpts { + No = 0, + Basic = 1, + Verbose = 2, +}; + +cl::opt InlinerFunctionImportStats( + "inliner-function-import-stats", + cl::init(InlinerFunctionImportStatsOpts::No), + cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", + "basic statistics"), + clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", + "printing of statistics for each inlined function"), + clEnumValEnd), + cl::Hidden, cl::desc("Enable inliner stats for imported functions")); +} // namespace + Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} Inliner::Inliner(char &ID, bool InsertLifetime) @@ -63,7 +81,6 @@ CallGraphSCCPass::getAnalysisUsage(AU); } - typedef DenseMap > InlinedArrayAllocasTy; @@ -75,9 +92,13 @@ /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime) { +static bool InlineCallIfPossible( + Pass &P, CallSite CS, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, + bool InsertLifetime, + ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { + // Storing because getCalledFunction() and getCaller() are not accessible + // after inlining, due to call instruction being eliminated. Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -94,6 +115,9 @@ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.recordInline(*Caller, *Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we @@ -496,7 +520,8 @@ // Attempt to inline the function. if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime)) { + InlineHistoryID, InsertLifetime, + ImportedFunctionsStats)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + " will not be inlined into " + @@ -568,6 +593,10 @@ /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.dump(InlinerFunctionImportStats == + InlinerFunctionImportStatsOpts::Verbose, + CG.getModule()); return removeDeadFunctions(CG); } Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -16,6 +16,7 @@ FunctionImportUtils.cpp GlobalStatus.cpp InlineFunction.cpp + ImportedFunctionsInliningStatistics.cpp InstructionNamer.cpp IntegerDivision.cpp LCSSA.cpp Index: lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -0,0 +1,198 @@ +//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions, mostly useful for +// ThinLTO. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +using namespace llvm; + +void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, + const Function &Callee) { + NameMap.insert({&Caller, Caller.getName()}); + auto &CallerNode = NodesMap[&Caller]; + if (!CallerNode) + CallerNode = llvm::make_unique(); + CallerNode->Imported = Caller.getMetadata("thinlto_src_module") != nullptr; + + NameMap.insert({&Callee, Callee.getName()}); + auto &CalleeNode = NodesMap[&Callee]; + if (!CalleeNode) + CalleeNode = llvm::make_unique(); + CalleeNode->Imported = Callee.getMetadata("thinlto_src_module") != nullptr; + CalleeNode->NumberOfInlines++; + + if (!CallerNode->Imported && !CalleeNode->Imported) { + // Direct inline from not imported callee to not imported caller, so we + // don't have to add this to graph. It might be very helpful if you wanna + // get the inliner statistics in compile step where there are no imported + // functions. In this case the graph would be empty. + CalleeNode->NumberOfRealInlines++; + return; + } + + CallerNode->InlinedCallees.push_back(CalleeNode.get()); + if (!CallerNode->Imported) + // Save Caller as a starting node for traversal. + NonImportedCallers.push_back(&Caller); +} + +static std::pair getFunctionsCount(const Module &M) { + int32_t AllFunctions = 0; + int32_t ImportedFunctions = 0; + + for (const auto &F : M.functions()) { + AllFunctions++; + ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); + } + + return {AllFunctions, ImportedFunctions}; +} + +std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, + const char *PercentageOfMsg, bool LineEnd = true) { + double Result = 0; + if (All != 0) + Result = 100 * static_cast(Fraction) / All; + + std::stringstream Str; + Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result + << "% of " << PercentageOfMsg << "]"; + if (LineEnd) + Str << "\n"; + return Str.str(); +} + +void ImportedFunctionsInliningStatistics::dump(const bool Verbose, + const Module &M) { + calculateRealInlines(); + NonImportedCallers.clear(); + + int32_t InlinedImportedFunctionsCount = 0; + int32_t InlinedNotImportedFunctionsCount = 0; + + int32_t InlinedImportedFunctionsToImportingModuleCount = 0; + int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; + + int32_t AllFunctions, ImportedFunctions; + std::tie(AllFunctions, ImportedFunctions) = getFunctionsCount(M); + + const auto SortedNodes = getSortedNodes(); + std::string Out; + Out.reserve(5000); + raw_string_ostream Ostream(Out); + + Ostream << "------- Dumping inliner stats for [" << M.getName() + << "] -------\n"; + + if (Verbose) + Ostream << "-- List of inlined functions:\n"; + + for (const auto &Node : SortedNodes) { + assert(Node.second->NumberOfInlines >= Node.second->NumberOfRealInlines); + if (Node.second->NumberOfInlines == 0) + continue; + + if (Node.second->Imported) { + InlinedImportedFunctionsCount++; + InlinedImportedFunctionsToImportingModuleCount += + int(Node.second->NumberOfRealInlines > 0); + } else { + InlinedNotImportedFunctionsCount++; + InlinedNotImportedFunctionsToImportingModuleCount += + int(Node.second->NumberOfRealInlines > 0); + } + + if (Verbose) + Ostream << "Inlined " + << (Node.second->Imported ? "imported " : "not imported ") + << "function [" << NameMap[Node.first] << "]" + << ": #inlines = " << Node.second->NumberOfInlines + << ", #inlines_to_importing_module = " + << Node.second->NumberOfRealInlines << "\n"; + } + + auto InlinedFunctionsCount = + InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; + auto NotImportedFuncCount = AllFunctions - ImportedFunctions; + auto ImportedNotInlinedIntoModule = + ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; + + Ostream << "-- Summary:\n" + << "All functions: " << AllFunctions + << ", imported functions: " << ImportedFunctions << "\n" + << getStatString("inlined functions", InlinedFunctionsCount, + AllFunctions, "all functions") + << getStatString("imported functions inlined anywhere", + InlinedImportedFunctionsCount, ImportedFunctions, + "imported functions") + << getStatString("imported functions inlined into importing module", + InlinedImportedFunctionsToImportingModuleCount, + ImportedFunctions, "imported functions", + /*LineEnd=*/false) + << getStatString(", remaining", ImportedNotInlinedIntoModule, + ImportedFunctions, "imported functions") + << getStatString("non-imported functions inlined anywhere", + InlinedNotImportedFunctionsCount, + NotImportedFuncCount, "non-imported functions") + << getStatString( + "non-imported functions inlined into importing module", + InlinedNotImportedFunctionsToImportingModuleCount, + NotImportedFuncCount, "non-imported functions"); + Ostream.flush(); + dbgs() << Out; +} + +void ImportedFunctionsInliningStatistics::calculateRealInlines() { + // Removing duplicated Callers. + std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); + NonImportedCallers.erase( + std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), + NonImportedCallers.end()); + + for (const auto *F : NonImportedCallers) + dfs(*NodesMap[F]); +} + +void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode& GraphNode) { + GraphNode.Visited = true; + for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { + InlinedFunctionNode->NumberOfRealInlines++; + if (!InlinedFunctionNode->Visited) + dfs(*InlinedFunctionNode); + } +} + +ImportedFunctionsInliningStatistics::SortedNodesTy +ImportedFunctionsInliningStatistics::getSortedNodes() { + SortedNodesTy SortedNodes(std::make_move_iterator(NodesMap.begin()), + std::make_move_iterator(NodesMap.end())); + NodesMap.clear(); + + std::sort(SortedNodes.begin(), SortedNodes.end(), + [&](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs.second->NumberOfInlines != Rhs.second->NumberOfInlines) + return Lhs.second->NumberOfInlines > Rhs.second->NumberOfInlines; + if (Lhs.second->NumberOfRealInlines != + Rhs.second->NumberOfRealInlines) + return Lhs.second->NumberOfRealInlines > + Rhs.second->NumberOfRealInlines; + return NameMap[Lhs.first] < NameMap[Rhs.first]; + }); + return SortedNodes; +} Index: test/Transforms/Inline/inline_stats.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline_stats.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK +; RUN: opt -S -inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK + +; CHECK: ------- Dumping inliner stats for [] ------- +; CHECK-BASIC-NOT: -- List of inlined functions: +; CHECK-BASIC-NOT: -- Inlined not imported function +; CHECK-VERBOSE: -- List of inlined functions: +; CHECK-VERBOSE: Inlined not imported function [internal2]: #inlines = 6, #inlines_to_importing_module = 2 +; CHECK-VERBOSE: Inlined imported function [external2]: #inlines = 4, #inlines_to_importing_module = 1 +; CHECK-VERBOSE: Inlined imported function [external1]: #inlines = 3, #inlines_to_importing_module = 2 +; CHECK-VERBOSE: Inlined imported function [external5]: #inlines = 1, #inlines_to_importing_module = 1 +; CHECK-VERBOSE: Inlined imported function [external3]: #inlines = 1, #inlines_to_importing_module = 0 + +; CHECK: -- Summary: +; CHECK: All functions: 10, imported functions: 7 +; CHECK: inlined functions: 5 [50% of all functions] +; CHECK: imported functions inlined anywhere: 4 [57.14% of imported functions] +; CHECK: imported functions inlined into importing module: 3 [42.86% of imported functions], remaining: 4 [57.14% of imported functions] +; CHECK: non-imported functions inlined anywhere: 1 [33.33% of non-imported functions] +; CHECK: non-imported functions inlined into importing module: 1 [33.33% of non-imported functions] + +define void @internal() { + call fastcc void @external1() + call fastcc void @internal2() + call coldcc void @external_big() + ret void +} + +define void @internal2() alwaysinline { + ret void +} + +define void @internal3() { + call fastcc void @external1() + call fastcc void @external5() + ret void +} + +define void @external1() alwaysinline !thinlto_src_module !0 { + call fastcc void @internal2() + call fastcc void @external2(); + ret void +} + +define void @external2() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external3() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external4() !thinlto_src_module !1 { + call fastcc void @external1() + call fastcc void @external2() + ret void +} + +define void @external5() !thinlto_src_module !1 { + ret void +} + +; Assume big piece of code here. This function won't be inlined, so all the +; inlined function it will have won't affect real inlines. +define void @external_big() noinline !thinlto_src_module !1 { +; CHECK-NOT: call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + +; CHECK-NOT: call fastcc void @external2() + call fastcc void @external2() + call fastcc void @external2() +; CHECK-NOT: call fastcc void @external3() + call fastcc void @external3() + ret void +} + +; It should not be imported, but it should not break anything. +define void @external_notcalled() !thinlto_src_module !0 { + call void @external_notcalled() + ret void +} + +!0 = !{!"file.cc"} +!1 = !{!"other.cc"}