Index: include/llvm/Transforms/Utils/ThinLTOInlinerStats.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/ThinLTOInlinerStats.h @@ -0,0 +1,27 @@ +//===-- ThinLTOInlinerStats.h - Generating inliner statistics ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_THINLTOINLINERSTATS_H +#define LLVM_TRANSFORMS_UTILS_THINLTOINLINERSTATS_H + +namespace llvm { +class Function; + +/// Record inline of @param Callee to @param Caller for statistis. +void recordInline(const Function &Caller, const Function &Callee); +/// Dump stats computed with InlinerStatistics class. +/// If @param Verbose is true then separate statistics for every inlined fnction +/// will be printed +void dumpInlinerStats(bool Verbose); + +} // llvm + +#endif // LLVM_TRANSFORMS_UTILS_THINLTOINLINERSTATS_H Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -33,6 +33,7 @@ #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ThinLTOInlinerStats.h" using namespace llvm; #define DEBUG_TYPE "inline" @@ -47,6 +48,24 @@ // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); +namespace { +enum class InlinerFunctionImportStatsOpts { + No = 0, + Basic = 1, + Verbose = 2, +}; + +cl::opt InlinerFunctionImportStats( + "inliner-function-import-stats", + cl::init(InlinerFunctionImportStatsOpts::No), + cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", + "basic statistics"), + clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", + "printing of statistics for each inlined function"), + clEnumValEnd), + cl::Hidden, cl::desc("Enable ThinLTO specific inliner stats")); +} // namespace + Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} Inliner::Inliner(char &ID, bool InsertLifetime) @@ -63,7 +82,6 @@ CallGraphSCCPass::getAnalysisUsage(AU); } - typedef DenseMap > InlinedArrayAllocasTy; @@ -78,6 +96,7 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { + // Callee and Caller will be set to null in CS after inlining. Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -94,6 +113,9 @@ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + recordInline(*Caller, *Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we @@ -568,6 +590,9 @@ /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + dumpInlinerStats(InlinerFunctionImportStats == + InlinerFunctionImportStatsOpts::Verbose); return removeDeadFunctions(CG); } Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -16,6 +16,7 @@ FunctionImportUtils.cpp GlobalStatus.cpp InlineFunction.cpp + ThinLTOInlinerStats.cpp InstructionNamer.cpp IntegerDivision.cpp LCSSA.cpp Index: lib/Transforms/Utils/ThinLTOInlinerStats.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/ThinLTOInlinerStats.cpp @@ -0,0 +1,216 @@ +//===-- ThinLTOInlinerStats.cpp - Generating inliner statistics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Generating inliner statistics for imported functions. +//===----------------------------------------------------------------------===// + + +#include "llvm/Transforms/Utils/ThinLTOInlinerStats.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +namespace llvm { +namespace { + +/// \brief Calculate and dump ThinLTO specific inliner stats. +/// The main statistics are: +/// (1) Number of inlined imported functions, +/// (2) Number of imported functions inlined into importing module (indirect), +/// (3) Number of non imported functions inlined into importing module (indirect). +/// The difference between first and the second is that first stat counts +/// all performed inlines on imported functions, but the second one only the +/// functions that have been eventually inlined to a function in the importing +/// module (by a chain of inlines). Because llvm uses bottom-up inliner, it is +/// possible to e.g. import function `A`, `B` and then inline `B` to `A`, +/// and after this `A` might be too big to be inlined into some other function +/// that calls it. It calculates this statistic by building graph, where +/// the nodes are functions, and edges are performed inlines and then by marking +/// the edges starting from not imported function. +/// +/// If `Verbose` is set to true, then it also dumps statistics +/// per each inlined function, sorted by the greatest inlines count like +/// - number of performed inlines +/// - number of performed inlines to importing module +class ThinLTOInlinerStatistics { +private: + /// InlineGraphNode represents node in graph of inlined functions. + struct InlineGraphNode { + // Default constructible and movable. + InlineGraphNode() = default; + InlineGraphNode(InlineGraphNode &&) = default; + InlineGraphNode &operator=(InlineGraphNode &&) = default; + InlineGraphNode(const InlineGraphNode &) = delete; + InlineGraphNode &operator=(const InlineGraphNode &) = delete; + + llvm::SmallVector InlinedCallees; + /// Incremented every direct inline. + int32_t NumberOfInlines = 0; + /// Number of inlines into non imported function (possibly indirect via + /// intermediate inlines). Computed based on graph search. + int32_t NumberOfRealInlines = 0; + bool Imported = false; + bool Visited = false; + }; + + using NodesMapTy = llvm::DenseMap; + friend ThinLTOInlinerStatistics &getInlinerStatistics(); + +public: + void addInlinedFunction(const Function &Caller, const Function &Callee); + void dumpStats(bool Verbose); + +private: + ThinLTOInlinerStatistics(); + ThinLTOInlinerStatistics(const ThinLTOInlinerStatistics&) = delete; + void calculateRealInlines(); + void dfs(InlineGraphNode *GraphNode); + + using SortedNodesTy = std::vector; + /// Clears NodesMap and returns vector of elements sorted by + /// (-NumberOfInlines, -NumberOfRealInlines, FunctionName). + SortedNodesTy getSortedNodes(); + +private: + NodesMapTy NodesMap; + /// Non external functions that have some other function inlined inside. + std::vector NonImportedCallers; + /// The mutex is to minimalize risk and prevent future bugs. + /// It might not be required to work right now. + mutable std::mutex Mutex; +}; + +ThinLTOInlinerStatistics::ThinLTOInlinerStatistics() { NonImportedCallers.reserve(200); } + +void ThinLTOInlinerStatistics::addInlinedFunction(const Function &Caller, + const Function &Callee) { + std::lock_guard Guard(Mutex); + auto &CallerNode = NodesMap[&Caller]; + CallerNode.Imported = Caller.getMetadata("thinlto_src_module") != nullptr; + + auto &CalleeNode = NodesMap[&Callee]; + CalleeNode.Imported = Callee.getMetadata("thinlto_src_module") != nullptr; + CalleeNode.NumberOfInlines++; + + if (!CallerNode.Imported && !CalleeNode.Imported) { + // Direct inline from not imported callee to not imported caller, so we + // don't have to add this to graph. It might be very helpful if you wanna + // get the inliner statistics in compile step where there are no imported + // functions. In this case the graph would be empty. + CalleeNode.NumberOfRealInlines++; + return; + } + + CallerNode.InlinedCallees.push_back(&CalleeNode); + if (!CallerNode.Imported) + // Save Caller as a starting node for traversal. + NonImportedCallers.push_back(&Caller); +} + +void ThinLTOInlinerStatistics::dumpStats(const bool Verbose) { + std::lock_guard Guard(Mutex); + calculateRealInlines(); + NonImportedCallers.clear(); + + int32_t InlinedImportedFunctionsCount = 0, + InlinedImportedFunctionsToImportingModuleCount = 0, + InlinedNotImportedFunctionsToImportingModuleCount = 0; + + const auto SortedNodes = getSortedNodes(); + dbgs() << "------- Dumping inliner stats -------\n"; + for (const auto &Node : SortedNodes) { + assert(Node.second.NumberOfInlines >= Node.second.NumberOfRealInlines); + if (Node.second.NumberOfInlines == 0) + continue; + + if (Node.second.Imported) { + InlinedImportedFunctionsCount += (Node.second.NumberOfInlines > 0) * 1; + InlinedImportedFunctionsToImportingModuleCount += + (Node.second.NumberOfRealInlines > 0) * 1; + } else { + InlinedNotImportedFunctionsToImportingModuleCount = + (Node.second.NumberOfRealInlines > 0) * 1; + } + + if (Verbose) + dbgs() << "Inlined " + << (Node.second.Imported ? "imported " : "not imported ") + << "function [" << Node.first->getName() << "]" + << ": #inlines = " << Node.second.NumberOfInlines + << ", #inlines_to_importing_module = " + << Node.second.NumberOfRealInlines << "\n"; + } + + dbgs() << "Number of inlined imported functions: " + << InlinedImportedFunctionsCount + << "\nNumber of imported functions inlined into importing module: " + << InlinedImportedFunctionsToImportingModuleCount + << "\nNumber of non-imported functions inlined into importing module: " + << InlinedNotImportedFunctionsToImportingModuleCount << "\n"; +} + +void ThinLTOInlinerStatistics::calculateRealInlines() { + // Removing duplicated Callers. + std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); + + NonImportedCallers.erase( + std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), + NonImportedCallers.end()); + for (const auto *F : NonImportedCallers) + dfs(&NodesMap[F]); +} + +void ThinLTOInlinerStatistics::dfs(InlineGraphNode *const GraphNode) { + GraphNode->Visited = true; + for (auto *const InlinedFunctionNode : GraphNode->InlinedCallees) { + InlinedFunctionNode->NumberOfRealInlines++; + if (!InlinedFunctionNode->Visited) + dfs(InlinedFunctionNode); + } +} + +ThinLTOInlinerStatistics::SortedNodesTy ThinLTOInlinerStatistics::getSortedNodes() { + SortedNodesTy SortedNodes(std::make_move_iterator(NodesMap.begin()), + std::make_move_iterator(NodesMap.end())); + NodesMap.clear(); + + std::sort(SortedNodes.begin(), SortedNodes.end(), + [](const SortedNodesTy::value_type &Lhs, + const SortedNodesTy::value_type &Rhs) { + if (Lhs.second.NumberOfInlines != Rhs.second.NumberOfInlines) + return Lhs.second.NumberOfInlines > Rhs.second.NumberOfInlines; + if (Lhs.second.NumberOfRealInlines != + Rhs.second.NumberOfRealInlines) + return Lhs.second.NumberOfRealInlines > + Rhs.second.NumberOfRealInlines; + return Lhs.first->getName() < Rhs.first->getName(); + }); + return SortedNodes; +} + +ThinLTOInlinerStatistics &getInlinerStatistics() { + static ThinLTOInlinerStatistics Stats; + return Stats; +} + +} // End of anonymous namespace + +void recordInline(const Function &Caller, const Function &Callee) { + getInlinerStatistics().addInlinedFunction(Caller, Callee); +} + +void dumpInlinerStats(bool Verbose) { + getInlinerStatistics().dumpStats(Verbose); +} + +} // End of namespace llvm Index: test/Transforms/Inline/inline_stats.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline_stats.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s + +; CHECK: ------- Dumping inliner stats ------- +; CHECK: Inlined not imported function [internal2]: #inlines = 6, #inlines_to_importing_module = 2 +; CHECK: Inlined imported function [external2]: #inlines = 4, #inlines_to_importing_module = 1 +; CHECK: Inlined imported function [external1]: #inlines = 3, #inlines_to_importing_module = 2 +; CHECK: Inlined imported function [external5]: #inlines = 1, #inlines_to_importing_module = 1 +; CHECK: Inlined imported function [external3]: #inlines = 1, #inlines_to_importing_module = 0 + +; CHECK: Number of inlined imported functions: 4 +; CHECK: Number of imported functions inlined into importing module: 3 +; CHECK: Number of non-imported functions inlined into importing module: 1 + +define void @internal() { + call fastcc void @external1() + call fastcc void @internal2() + call coldcc void @external_big() + ret void +} + +define void @internal2() alwaysinline { + ret void +} + +define void @internal3() { + call fastcc void @external1() + call fastcc void @external5() + ret void +} + +define void @external1() alwaysinline !thinlto_src_module !0 { + call fastcc void @internal2() + call fastcc void @external2(); + ret void +} + +define void @external2() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external3() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external4() !thinlto_src_module !1 { + call fastcc void @external1() + call fastcc void @external2() + ret void +} + +define void @external5() !thinlto_src_module !1 { + ret void +} + +; Assume big pice of code here. This function won't be inlined, so all the +; inlined function it will have won't affect real inlines. +define void @external_big() noinline !thinlto_src_module !1 { +; CHECK-NOT: call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + +; CHECK-NOT: call fastcc void @external2() + call fastcc void @external2() + call fastcc void @external2() +; CHECK-NOT: call fastcc void @external3() + call fastcc void @external3() + ret void +} + +; It should not be imported, but it should not break anything. +define void @external_notcalled() !thinlto_src_module !0 { + call void @external_notcalled() + ret void +} + +!0 = !{!"file.cc"} +!1 = !{!"other.cc"}