Index: include/llvm/Transforms/IPO/InlinerPass.h =================================================================== --- include/llvm/Transforms/IPO/InlinerPass.h +++ include/llvm/Transforms/IPO/InlinerPass.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" namespace llvm { class AssumptionCacheTracker; @@ -78,6 +79,7 @@ protected: AssumptionCacheTracker *ACT; ProfileSummaryInfo *PSI; + ImportedFunctionsInliningStatistics ImportedFunctionsStats; }; } // End llvm namespace Index: include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h @@ -0,0 +1,105 @@ +//===-- ImportedFunctionsInliningStats.h ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions, mostly useful for +// ThinLTO. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H +#define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +namespace llvm { +class Module; +class Function; +/// \brief Calculate and dump ThinLTO specific inliner stats. +/// The main statistics are: +/// (1) Number of inlined imported functions, +/// (2) Number of imported functions inlined into importing module (indirect), +/// (3) Number of non imported functions inlined into importing module +/// (indirect). +/// The difference between first and the second is that first stat counts +/// all performed inlines on imported functions, but the second one only the +/// functions that have been eventually inlined to a function in the importing +/// module (by a chain of inlines). Because llvm uses bottom-up inliner, it is +/// possible to e.g. import function `A`, `B` and then inline `B` to `A`, +/// and after this `A` might be too big to be inlined into some other function +/// that calls it. It calculates this statistic by building graph, where +/// the nodes are functions, and edges are performed inlines and then by marking +/// the edges starting from not imported function. +/// +/// If `Verbose` is set to true, then it also dumps statistics +/// per each inlined function, sorted by the greatest inlines count like +/// - number of performed inlines +/// - number of performed inlines to importing module +class ImportedFunctionsInliningStatistics { +private: + /// InlineGraphNode represents node in graph of inlined functions. + struct InlineGraphNode { + // Default-constructible and movable. + InlineGraphNode() = default; + InlineGraphNode(InlineGraphNode &&) = default; + InlineGraphNode &operator=(InlineGraphNode &&) = default; + InlineGraphNode(const InlineGraphNode &) = delete; + InlineGraphNode &operator=(const InlineGraphNode &) = delete; + + llvm::SmallVector InlinedCallees; + /// Incremented every direct inline. + int32_t NumberOfInlines = 0; + /// Number of inlines into non imported function (possibly indirect via + /// intermediate inlines). Computed based on graph search. + int32_t NumberOfRealInlines = 0; + bool Imported = false; + bool Visited = false; + /// Store name because the function pointer might be dead. + std::string FunctionName; + }; + +public: + ImportedFunctionsInliningStatistics() = default; + ImportedFunctionsInliningStatistics( + const ImportedFunctionsInliningStatistics &) = delete; + + /// Record inline of @param Callee to @param Caller for statistis. + void recordInline(const Function &Caller, const Function &Callee); + /// Dump stats computed with InlinerStatistics class. + /// If @param Verbose is true then separate statistics for every inlined + /// function will be printed. + void dump(bool Verbose, const Module &M); + +private: + /// Creates new Node in NodeMap and sets attributes, or returns existed one. + InlineGraphNode &createInlineGraphNode(const Function &); + void calculateRealInlines(); + void dfs(InlineGraphNode &GraphNode); + + using NodesMapTy = + llvm::DenseMap>; + using SortedNodesTy = std::vector; + /// Clears NodesMap and returns vector of elements sorted by + /// (-NumberOfInlines, -NumberOfRealInlines, FunctionName). + SortedNodesTy getSortedNodes(); + +private: + /// This map manage life of all InlineGraphNodes. Unique pointer to + /// InlineGraphNode used since the node pointers are also saved in the + /// InlinedCallees vector. Ff it would store InlineGraphNode instead then the + /// address of the node would not be invariant. + NodesMapTy NodesMap; + /// Non external functions that have some other function inlined inside. + /// Should not dereference pointers because Functions might be deleted. + std::vector NonImportedCallers; +}; + +} // llvm + +#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -47,6 +47,24 @@ // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); +namespace { +enum class InlinerFunctionImportStatsOpts { + No = 0, + Basic = 1, + Verbose = 2, +}; + +cl::opt InlinerFunctionImportStats( + "inliner-function-import-stats", + cl::init(InlinerFunctionImportStatsOpts::No), + cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", + "basic statistics"), + clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", + "printing of statistics for each inlined function"), + clEnumValEnd), + cl::Hidden, cl::desc("Enable inliner stats for imported functions")); +} // namespace + Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} Inliner::Inliner(char &ID, bool InsertLifetime) @@ -75,11 +93,11 @@ /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static bool -InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime, - std::function &AARGetter) { +static bool InlineCallIfPossible( + CallSite CS, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, + bool InsertLifetime, std::function &AARGetter, + ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -90,6 +108,9 @@ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.recordInline(*Caller, *Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we @@ -384,7 +405,8 @@ ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, bool InsertLifetime, std::function GetInlineCost, - std::function AARGetter) { + std::function AARGetter, + ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { SmallPtrSet SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); for (CallGraphNode *Node : SCC) { @@ -502,7 +524,8 @@ // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, AARGetter)) { + InlineHistoryID, InsertLifetime, AARGetter, + ImportedFunctionsStats)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + " will not be inlined into " + @@ -591,12 +614,16 @@ }; return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, [this](CallSite CS) { return getInlineCost(CS); }, - AARGetter); + AARGetter, ImportedFunctionsStats); } /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.dump(InlinerFunctionImportStats == + InlinerFunctionImportStatsOpts::Verbose, + CG.getModule()); return removeDeadFunctions(CG); } Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -16,6 +16,7 @@ FunctionImportUtils.cpp GlobalStatus.cpp InlineFunction.cpp + ImportedFunctionsInliningStatistics.cpp InstructionNamer.cpp IntegerDivision.cpp LCSSA.cpp Index: lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -0,0 +1,203 @@ +//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions, mostly useful for +// ThinLTO. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +using namespace llvm; + +ImportedFunctionsInliningStatistics::InlineGraphNode & +ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { + + auto &ValueLookup = NodesMap[&F]; + if (!ValueLookup) { + ValueLookup = llvm::make_unique(); + ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr; + ValueLookup->FunctionName = F.getName(); + } + return *ValueLookup; +} + +void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, + const Function &Callee) { + + InlineGraphNode &CallerNode = createInlineGraphNode(Caller); + InlineGraphNode &CalleeNode = createInlineGraphNode(Callee); + CalleeNode.NumberOfInlines++; + + if (!CallerNode.Imported && !CalleeNode.Imported) { + // Direct inline from not imported callee to not imported caller, so we + // don't have to add this to graph. It might be very helpful if you wanna + // get the inliner statistics in compile step where there are no imported + // functions. In this case the graph would be empty. + CalleeNode.NumberOfRealInlines++; + return; + } + + CallerNode.InlinedCallees.push_back(&CalleeNode); + if (!CallerNode.Imported) + // Save Caller as a starting node for traversal. + NonImportedCallers.push_back(&Caller); +} + +static std::pair getFunctionsCount(const Module &M) { + int32_t AllFunctions = 0; + int32_t ImportedFunctions = 0; + // FIXME: We are not counting all the functions that were inlined because + // some of them could be removed after inlining. + for (const auto &F : M.functions()) { + AllFunctions++; + ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); + } + + return {AllFunctions, ImportedFunctions}; +} + +std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, + const char *PercentageOfMsg, bool LineEnd = true) { + double Result = 0; + if (All != 0) + Result = 100 * static_cast(Fraction) / All; + + std::stringstream Str; + Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result + << "% of " << PercentageOfMsg << "]"; + if (LineEnd) + Str << "\n"; + return Str.str(); +} + +void ImportedFunctionsInliningStatistics::dump(const bool Verbose, + const Module &M) { + calculateRealInlines(); + NonImportedCallers.clear(); + + int32_t InlinedImportedFunctionsCount = 0; + int32_t InlinedNotImportedFunctionsCount = 0; + + int32_t InlinedImportedFunctionsToImportingModuleCount = 0; + int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; + + int32_t AllFunctions, ImportedFunctions; + std::tie(AllFunctions, ImportedFunctions) = getFunctionsCount(M); + + const auto SortedNodes = getSortedNodes(); + std::string Out; + Out.reserve(5000); + raw_string_ostream Ostream(Out); + + Ostream << "------- Dumping inliner stats for [" << M.getName() + << "] -------\n"; + + if (Verbose) + Ostream << "-- List of inlined functions:\n"; + + for (const auto &Node : SortedNodes) { + assert(Node.second->NumberOfInlines >= Node.second->NumberOfRealInlines); + if (Node.second->NumberOfInlines == 0) + continue; + + if (Node.second->Imported) { + InlinedImportedFunctionsCount++; + InlinedImportedFunctionsToImportingModuleCount += + int(Node.second->NumberOfRealInlines > 0); + } else { + InlinedNotImportedFunctionsCount++; + InlinedNotImportedFunctionsToImportingModuleCount += + int(Node.second->NumberOfRealInlines > 0); + } + + if (Verbose) + Ostream << "Inlined " + << (Node.second->Imported ? "imported " : "not imported ") + << "function [" << Node.second->FunctionName << "]" + << ": #inlines = " << Node.second->NumberOfInlines + << ", #inlines_to_importing_module = " + << Node.second->NumberOfRealInlines << "\n"; + } + + auto InlinedFunctionsCount = + InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; + auto NotImportedFuncCount = AllFunctions - ImportedFunctions; + auto ImportedNotInlinedIntoModule = + ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; + + Ostream << "-- Summary:\n" + << "All functions: " << AllFunctions + << ", imported functions: " << ImportedFunctions << "\n" + << getStatString("inlined functions", InlinedFunctionsCount, + AllFunctions, "all functions") + << getStatString("imported functions inlined anywhere", + InlinedImportedFunctionsCount, ImportedFunctions, + "imported functions") + << getStatString("imported functions inlined into importing module", + InlinedImportedFunctionsToImportingModuleCount, + ImportedFunctions, "imported functions", + /*LineEnd=*/false) + << getStatString(", remaining", ImportedNotInlinedIntoModule, + ImportedFunctions, "imported functions") + << getStatString("non-imported functions inlined anywhere", + InlinedNotImportedFunctionsCount, + NotImportedFuncCount, "non-imported functions") + << getStatString( + "non-imported functions inlined into importing module", + InlinedNotImportedFunctionsToImportingModuleCount, + NotImportedFuncCount, "non-imported functions"); + Ostream.flush(); + dbgs() << Out; +} + +void ImportedFunctionsInliningStatistics::calculateRealInlines() { + // Removing duplicated Callers. + std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); + NonImportedCallers.erase( + std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), + NonImportedCallers.end()); + + for (const auto *F : NonImportedCallers) + dfs(*NodesMap[F]); +} + +void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) { + GraphNode.Visited = true; + for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { + InlinedFunctionNode->NumberOfRealInlines++; + if (!InlinedFunctionNode->Visited) + dfs(*InlinedFunctionNode); + } +} + +ImportedFunctionsInliningStatistics::SortedNodesTy +ImportedFunctionsInliningStatistics::getSortedNodes() { + SortedNodesTy SortedNodes(std::make_move_iterator(NodesMap.begin()), + std::make_move_iterator(NodesMap.end())); + NodesMap.clear(); + + std::sort( + SortedNodes.begin(), SortedNodes.end(), + [&](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs.second->NumberOfInlines != Rhs.second->NumberOfInlines) + return Lhs.second->NumberOfInlines > Rhs.second->NumberOfInlines; + if (Lhs.second->NumberOfRealInlines != Rhs.second->NumberOfRealInlines) + return Lhs.second->NumberOfRealInlines > + Rhs.second->NumberOfRealInlines; + return Lhs.second->FunctionName < Rhs.second->FunctionName; + }); + return SortedNodes; +} Index: test/Transforms/Inline/inline_stats.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline_stats.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK +; RUN: opt -S -inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK + +; CHECK: ------- Dumping inliner stats for [] ------- +; CHECK-BASIC-NOT: -- List of inlined functions: +; CHECK-BASIC-NOT: -- Inlined not imported function +; CHECK-VERBOSE: -- List of inlined functions: +; CHECK-VERBOSE: Inlined not imported function [internal2]: #inlines = 6, #inlines_to_importing_module = 2 +; CHECK-VERBOSE: Inlined imported function [external2]: #inlines = 4, #inlines_to_importing_module = 1 +; CHECK-VERBOSE: Inlined imported function [external1]: #inlines = 3, #inlines_to_importing_module = 2 +; CHECK-VERBOSE: Inlined imported function [external5]: #inlines = 1, #inlines_to_importing_module = 1 +; CHECK-VERBOSE: Inlined imported function [external3]: #inlines = 1, #inlines_to_importing_module = 0 + +; CHECK: -- Summary: +; CHECK: All functions: 10, imported functions: 7 +; CHECK: inlined functions: 5 [50% of all functions] +; CHECK: imported functions inlined anywhere: 4 [57.14% of imported functions] +; CHECK: imported functions inlined into importing module: 3 [42.86% of imported functions], remaining: 4 [57.14% of imported functions] +; CHECK: non-imported functions inlined anywhere: 1 [33.33% of non-imported functions] +; CHECK: non-imported functions inlined into importing module: 1 [33.33% of non-imported functions] + +define void @internal() { + call fastcc void @external1() + call fastcc void @internal2() + call coldcc void @external_big() + ret void +} + +define void @internal2() alwaysinline { + ret void +} + +define void @internal3() { + call fastcc void @external1() + call fastcc void @external5() + ret void +} + +define void @external1() alwaysinline !thinlto_src_module !0 { + call fastcc void @internal2() + call fastcc void @external2(); + ret void +} + +define void @external2() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external3() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external4() !thinlto_src_module !1 { + call fastcc void @external1() + call fastcc void @external2() + ret void +} + +define void @external5() !thinlto_src_module !1 { + ret void +} + +; Assume big piece of code here. This function won't be inlined, so all the +; inlined function it will have won't affect real inlines. +define void @external_big() noinline !thinlto_src_module !1 { +; CHECK-NOT: call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + +; CHECK-NOT: call fastcc void @external2() + call fastcc void @external2() + call fastcc void @external2() +; CHECK-NOT: call fastcc void @external3() + call fastcc void @external3() + ret void +} + +; It should not be imported, but it should not break anything. +define void @external_notcalled() !thinlto_src_module !0 { + call void @external_notcalled() + ret void +} + +!0 = !{!"file.cc"} +!1 = !{!"other.cc"}