This is an archive of the discontinued LLVM Phabricator instance.

FunctionImporter: implement bulk function importing for efficiency
AbandonedPublic

Authored by mehdi_amini on Dec 2 2015, 10:04 PM.

Download Raw Diff

Details

Reviewers

Summary

The current importing scheme is processing one function at a time,
loading the source Module, linking the function in the destination
module, and destroying the source Module before repeating with the
next function to import (potentially from the same Module).

Ideally we would keep the source Module alive and import the next
Function needed from this Module. Unfortunately this is not possible
because the linker does not leave it in a usable state.

However we can do better by first computing the list of all candidates
per Module, and only then load the source Module and import all the
function we need for it.

We still need to repeat the process for callees of the imported
Function. This is avoidable with another alternative scheme where
we would load the source Module, materialize the Function, and
add the callees to the Worklist without actually importing the
Function. The import would take place in the end when we're done
with computing the import set.

Currently this patch already improves considerably the link time,
a multithreaded link of llvm-dis on my laptop was:

real  1m12.175s  user  6m32.430s sys  0m10.529s

and is now:

real  0m47.400s  user  3m1.551s  sys  0m5.825s

Note: this is the full link time (linker+Import+Optimizer+CodeGen)

Diff Detail

Event Timeline

mehdi_amini updated this revision to Diff 41719.Dec 2 2015, 10:04 PM

mehdi_amini retitled this revision from to FunctionImporter: implement bulk function importing for efficiency.

mehdi_amini updated this object.

mehdi_amini added a reviewer: tejohnson.

mehdi_amini added subscribers: llvm-commits, dexonsmith.

Obsolete per D15178.

mehdi_amini mentioned this in D15178: FunctionImporter: implement bulk function importing for efficiency.Dec 2 2015, 11:18 PM

Revision Contents

Path

Size

lib/

Transforms/

IPO/

FunctionImport.cpp

144 lines

Diff 41719

lib/Transforms/IPO/FunctionImport.cpp

Show All 18 Lines
#include "llvm/IR/IntrinsicInst.h"		#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"		#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"		#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"		#include "llvm/Linker/Linker.h"
#include "llvm/Object/FunctionIndexObjectFile.h"		#include "llvm/Object/FunctionIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include "llvm/Support/SourceMgr.h"		#include "llvm/Support/SourceMgr.h"

using namespace llvm;		using namespace llvm;

#define DEBUG_TYPE "function-import"		#define DEBUG_TYPE "function-import"

/// Limit on instruction count of imported functions.		/// Limit on instruction count of imported functions.
static cl::opt<unsigned> ImportInstrLimit(		static cl::opt<unsigned> ImportInstrLimit(
"import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),		"import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
cl::desc("Only import functions with less than N instructions"));		cl::desc("Only import functions with less than N instructions"));
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	for (auto &I : BB) {
Worklist.push_back(CalledFunction->getName());		Worklist.push_back(CalledFunction->getName());
}		}
}		}
}		}
}		}
}		}

// Helper function: given a worklist and an index, will process all the worklist		// Helper function: given a worklist and an index, will process all the worklist
// and import them based on the summary information		// and decide which function to add to the import list
static unsigned ProcessImportWorklist(		static unsigned GetImportList(SmallVector<StringRef, 64> &Worklist,
Module &DestModule, SmallVector<StringRef, 64> &Worklist,		StringMap<DenseSet<StringRef>>
StringSet<> &CalledFunctions, Linker &TheLinker,		&ModuleToFunctionsToImportMap,
const FunctionInfoIndex &Index,		const FunctionInfoIndex &Index) {
std::function<Module &(StringRef FileName)> &LazyModuleLoader) {
unsigned ImportCount = 0;		unsigned ImportCount = 0;
while (!Worklist.empty()) {		while (!Worklist.empty()) {
auto CalledFunctionName = Worklist.pop_back_val();		auto CalledFunctionName = Worklist.pop_back_val();
DEBUG(dbgs() << "Process import for " << CalledFunctionName << "\n");		DEBUG(dbgs() << "Process import for " << CalledFunctionName << "\n");

// Try to get a summary for this function call.		// Try to get a summary for this function call.
auto InfoList = Index.findFunctionInfoList(CalledFunctionName);		auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
if (InfoList == Index.end()) {		if (InfoList == Index.end()) {
Show All 18 Lines	while (!Worklist.empty()) {
if (Summary->instCount() > ImportInstrLimit) {		if (Summary->instCount() > ImportInstrLimit) {
DEBUG(dbgs() << "Skip import of " << CalledFunctionName << " with "		DEBUG(dbgs() << "Skip import of " << CalledFunctionName << " with "
<< Summary->instCount() << " instructions (limit "		<< Summary->instCount() << " instructions (limit "
<< ImportInstrLimit << ")\n");		<< ImportInstrLimit << ")\n");
continue;		continue;
}		}

// Get the module path from the summary.		// Get the module path from the summary.
auto FileName = Summary->modulePath();		auto ModuleIdentifier = Summary->modulePath();
DEBUG(dbgs() << "Importing " << CalledFunctionName << " from " << FileName		DEBUG(dbgs() << "Importing " << CalledFunctionName << " from "
<< "\n");		<< ModuleIdentifier << "\n");

// Get the module for the import (potentially from the cache).		// Add the function name to the list of import for this module
auto &Module = LazyModuleLoader(FileName);		ModuleToFunctionsToImportMap[ModuleIdentifier].insert(CalledFunctionName);
assert(&Module.getContext() == &DestModule.getContext());
		++ImportCount;
		}
		return ImportCount;
		}

		static unsigned ImportFunctions(Module &SrcModule,
		DenseSet<StringRef> FunctionNamesToImport,
		Linker &TheLinker,
		const FunctionInfoIndex &Index) {
		// Set of functions to import
		DenseSet<const GlobalValue *> FunctionsToImport;

		for (auto ImportFunctionName : FunctionNamesToImport) {
// The function that we will import!		// The function that we will import!
GlobalValue *SGV = Module.getNamedValue(CalledFunctionName);		GlobalValue *SGV = SrcModule.getNamedValue(ImportFunctionName);
StringRef ImportFunctionName = CalledFunctionName;
if (!SGV) {		if (!SGV) {
// Might be local in source Module, promoted/renamed in DestModule.		// The destination module is referencing function using their renamed name
		// when importing a function that was originally local in the source
		// module. The source module we have might not have been renamed so we try
		// to remove the suffix added during the renaming to recover the original
		// name in the source module.
std::pair<StringRef, StringRef> Split =		std::pair<StringRef, StringRef> Split =
CalledFunctionName.split(".llvm.");		ImportFunctionName.split(".llvm.");
SGV = Module.getNamedValue(Split.first);		SGV = SrcModule.getNamedValue(Split.first);
#ifndef NDEBUG		assert(SGV && "Can't find function to import in source module");
// Assert that Split.second is module id		}
uint64_t ModuleId;		if (!SGV) {
assert(!Split.second.getAsInteger(10, ModuleId));		report_fatal_error(Twine("Can't load function '") + ImportFunctionName +
assert(ModuleId == Index.getModuleId(FileName));		"' in Module '" + SrcModule.getModuleIdentifier() +
#endif		"', error in the summary?\n");
}		}

Function *F = dyn_cast<Function>(SGV);		Function *F = dyn_cast<Function>(SGV);
if (!F && isa<GlobalAlias>(SGV)) {		if (!F && isa<GlobalAlias>(SGV)) {
auto *SGA = dyn_cast<GlobalAlias>(SGV);		auto *SGA = dyn_cast<GlobalAlias>(SGV);
F = dyn_cast<Function>(SGA->getBaseObject());		F = dyn_cast<Function>(SGA->getBaseObject());
ImportFunctionName = F->getName();		ImportFunctionName = F->getName();
}		}
if (!F) {		assert(F && "Imported Function is ... not a Function");
errs() << "Can't load function '" << CalledFunctionName << "' in Module '"
<< FileName << "', error in the summary?\n";
llvm_unreachable("Can't load function in Module");
}

// We cannot import weak_any functions/aliases without possibly affecting		// We cannot import weak_any functions/aliases without possibly affecting
// the order they are seen and selected by the linker, changing program		// the order they are seen and selected by the linker, changing program
// semantics.		// semantics.
if (SGV->hasWeakAnyLinkage()) {		if (SGV->hasWeakAnyLinkage()) {
DEBUG(dbgs() << "Ignoring import request for weak-any "		DEBUG(dbgs() << "Ignoring import request for weak-any "
<< (isa<Function>(SGV) ? "function " : "alias ")		<< (isa<Function>(SGV) ? "function " : "alias ")
<< CalledFunctionName << " from " << FileName << "\n");		<< ImportFunctionName << " from "
		<< SrcModule.getModuleIdentifier() << "\n");
continue;		continue;
}		}

// Link in the specified function.		// Add the function to the import list
DenseSet<const GlobalValue *> FunctionsToImport;
FunctionsToImport.insert(F);		FunctionsToImport.insert(F);
if (TheLinker.linkInModule(Module, Linker::Flags::None, &Index,		}

		if (FunctionsToImport.empty())
		return 0;

		// Link in the specified functions.
		if (TheLinker.linkInModule(SrcModule, Linker::Flags::None, &Index,
&FunctionsToImport))		&FunctionsToImport))
report_fatal_error("Function Import: link error");		report_fatal_error("Function Import: link error");

// Process the newly imported function and add callees to the worklist.		return FunctionsToImport.size();
GlobalValue *NewGV = DestModule.getNamedValue(ImportFunctionName);
assert(NewGV);
Function *NewF = dyn_cast<Function>(NewGV);
assert(NewF);
findExternalCalls(*NewF, CalledFunctions, Worklist);
++ImportCount;
}
return ImportCount;
}		}

// Automatically import functions in Module \p DestModule based on the summaries		// Automatically import functions in Module \p DestModule based on the summaries
// index.		// index.
//		//
// The current implementation imports every called functions that exists in the		// The current implementation imports every called functions that exists in the
// summaries index.		// summaries index.
bool FunctionImporter::importFunctions(Module &DestModule) {		bool FunctionImporter::importFunctions(Module &DestModule) {
Show All 12 Lines	bool FunctionImporter::importFunctions(Module &DestModule) {
if (Worklist.empty())		if (Worklist.empty())
return false;		return false;

/// Second step: for every call to an external function, try to import it.		/// Second step: for every call to an external function, try to import it.

// Linker that will be used for importing function		// Linker that will be used for importing function
Linker TheLinker(DestModule, DiagnosticHandler);		Linker TheLinker(DestModule, DiagnosticHandler);

ImportedCount += ProcessImportWorklist(DestModule, Worklist, CalledFunctions,		// For each iteration, GetImportList() will flush the Worklist and
TheLinker, Index, getLazyModule);		// populate ModuleToFunctionsToImportMap with the function we really want
		// to import.
		// Then we'll process one module at a time, importing the list of functions
		// for this module.
		// Finally, the imported function will be analyzed and the worklist will be
		// populated with the set of candidates.
		while (!Worklist.empty()) {
		// Map of Module -> List of Function to import from the Module
		StringMap<DenseSet<StringRef>> ModuleToFunctionsToImportMap;

		// Analyze the summaries and get the list of functions to import by
		// populating ModuleToFunctionsToImportMap
		GetImportList(Worklist, ModuleToFunctionsToImportMap, Index);
		assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");

		// Do the actual import of functions now, one Module at a time
		for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
		// Get the module for the import
		// Can't cache it for now because the linker mess it up.
		auto &FunctionsToImport = FunctionsToImportPerModule.second;
		const auto &ModuleName = FunctionsToImportPerModule.first();
		auto &SrcModule = getLazyModule(ModuleName);
		assert(&DestModule.getContext() == &SrcModule.getContext() &&
		"Context mismatch");

		ImportedCount +=
		ImportFunctions(SrcModule, FunctionsToImport, TheLinker, Index);

		/// Make functions as processed so we don't try to reimport them
		CalledFunctions.insert(FunctionsToImport.begin(),
		FunctionsToImport.end());

		// Process the newly imported functions and add callees to the worklist.
		for (const auto &ImportedFunction : FunctionsToImport) {
		GlobalValue *NewGV =
		DestModule.getNamedValue(ImportedFunction);
		assert(NewGV && "Can't locate newly imported function");
		Function *NewF = dyn_cast<Function>(NewGV);
		assert(NewF && "Imported Function is ... not a Function?");
		findExternalCalls(*NewF, CalledFunctions, Worklist);
		}
		}
		}
DEBUG(errs() << "Imported " << ImportedCount << " functions for Module "		DEBUG(errs() << "Imported " << ImportedCount << " functions for Module "
<< DestModule.getModuleIdentifier() << "\n");		<< DestModule.getModuleIdentifier() << "\n");
return ImportedCount;		return ImportedCount;
}		}

/// Summary file to use for function importing when using -function-import from		/// Summary file to use for function importing when using -function-import from
/// the command line.		/// the command line.
static cl::opt<std::string>		static cl::opt<std::string>
▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines