Index: include/llvm/LTO/ThinLTOCodeGenerator.h =================================================================== --- include/llvm/LTO/ThinLTOCodeGenerator.h +++ include/llvm/LTO/ThinLTOCodeGenerator.h @@ -201,6 +201,11 @@ void crossModuleImport(Module &Module, ModuleSummaryIndex &Index); /** + * Perform internalization. + */ + void internalize(Module &Module, ModuleSummaryIndex &Index); + + /** * Perform post-importing ThinLTO optimizations. */ void optimize(Module &Module); Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -17,6 +17,8 @@ #ifdef HAVE_LLVM_REVISION #include "LLVMLTORevision.h" #endif + +#include "UpdateCompilerUsed.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -32,6 +34,7 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CachePruning.h" @@ -44,6 +47,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -309,6 +313,77 @@ PM.run(TheModule); } +// Create a DenseSet of GlobalValue to be used with the Internalizer. +static DenseSet computePreservedSymbolsForModule( + Module &TheModule, const DenseSet &GUIDPreservedSymbols, + const FunctionImporter::ExportSetTy &ExportList) { + DenseSet PreservedGV; + if (GUIDPreservedSymbols.empty()) + // Early exit: internalize is disabled when there is nothing to preserve. + return PreservedGV; + + auto AddPreserveGV = [&](const GlobalValue &GV) { + auto GUID = GV.getGUID(); + if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID)) + PreservedGV.insert(&GV); + }; + + for (auto &GV : TheModule) + AddPreserveGV(GV); + for (auto &GV : TheModule.globals()) + AddPreserveGV(GV); + for (auto &GV : TheModule.aliases()) + AddPreserveGV(GV); + + return PreservedGV; +} + +// Run internalization on \p TheModule +static void +doInternalizeModule(Module &TheModule, const TargetMachine &TM, + const DenseSet &PreservedGV) { + if (PreservedGV.empty()) { + // Be friendly and don't nuke totally the module when the client didn't + // supply anything to preserve. + return; + } + + // Parse inline ASM and collect the list of symbols that are not defined in + // the current module. + StringSet<> AsmUndefinedRefs; + object::IRObjectFile::CollectAsmUndefinedRefs( + Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(), + [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { + if (Flags & object::BasicSymbolRef::SF_Undefined) + AsmUndefinedRefs.insert(Name); + }); + + // Update the llvm.compiler_used globals to force preserving libcalls and + // symbols referenced from asm + UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs); + + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + auto MustPreserveGV = + [&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); }; + + llvm::internalizeModule(TheModule, MustPreserveGV); +} + +// Convert the PreservedSymbols map from "Name" based to "GUID" based. +static DenseSet +computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, + const Triple &TheTriple) { + DenseSet GUIDPreservedSymbols(PreservedSymbols.size()); + for (auto &Entry : PreservedSymbols) { + StringRef Name = Entry.first(); + if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') + Name = Name.drop_front(); + GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); + } + return GUIDPreservedSymbols; +} + std::unique_ptr codegenModule(Module &TheModule, TargetMachine &TM) { SmallVector OutputBuffer; @@ -395,6 +470,9 @@ sys::path::append(EntryPath, CachePath, toHex(Hasher.result())); } + // Access the path to this entry in the cache. + StringRef getEntryPath() { return EntryPath; } + // Try loading the buffer for this cache entry. ErrorOr> tryLoadingBuffer() { if (EntryPath.empty()) @@ -429,6 +507,8 @@ Module &TheModule, const ModuleSummaryIndex &Index, StringMap &ModuleMap, TargetMachine &TM, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const DenseSet &GUIDPreservedSymbols, std::map &ResolvedODR, ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, unsigned count) { @@ -436,6 +516,13 @@ // Save temps: after IPO. saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc"); + // Prepare for internalization by computing the set of symbols to preserve. + // We need to compute the list of symbols to preserve during internalization + // before doing any promotion because after renaming we won't (easily) match + // to the original name. + auto PreservedGV = computePreservedSymbolsForModule( + TheModule, GUIDPreservedSymbols, ExportList); + // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); @@ -449,16 +536,24 @@ // Save temps: after promotion. saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc"); + } + + // Internalization + doInternalizeModule(TheModule, TM, PreservedGV); + // Save internalized bitcode + saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc"); + + if (!SingleModule) { crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); // Save temps: after cross-module import. - saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); + saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc"); } optimizeModule(TheModule, TM); - saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc"); + saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc"); if (DisableCodeGen) { // Configured to stop before CodeGen, serialize the bitcode and return. @@ -516,7 +611,10 @@ } void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { - CrossReferencedSymbols.insert(Name); + // FIXME: At the moment, we don't take advantage of this extra information, + // we're conservatively considering cross-references as preserved. + // CrossReferencedSymbols.insert(Name); + PreservedSymbols.insert(Name); } // TargetMachine factory @@ -620,10 +718,43 @@ } /** + * Perform internalization. + */ +void ThinLTOCodeGenerator::internalize(Module &TheModule, + ModuleSummaryIndex &Index) { + initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap> + ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Generate import/export list + StringMap ImportLists(ModuleCount); + StringMap ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + auto &ExportList = ExportLists[ModuleIdentifier]; + + // Internalization + auto PreservedGV = computePreservedSymbolsForModule( + TheModule, GUIDPreservedSymbols, ExportList); + doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV); +} + +/** * Perform post-importing ThinLTO optimizations. */ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + + // Optimize now optimizeModule(TheModule, *TMBuilder.create()); } @@ -694,10 +825,9 @@ ExportLists); // Convert the preserved symbols set from string to GUID, this is needed for - // computing the caching. - DenseSet GUIDPreservedSymbols(PreservedSymbols.size()); - for (auto &Entry : PreservedSymbols) - GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first())); + // computing the caching hash and the internalization. + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Parallel optimizer + codegen { @@ -714,18 +844,21 @@ // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. std::map ResolvedODR; - ResolveODR(*Index, ExportList, DefinedFunctions, - ModuleIdentifier, ResolvedODR); + ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier, + ResolvedODR); // The module may be cached, this helps handling it. - ModuleCacheEntry CacheEntry( - CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(), - ImportLists[ModuleBuffer.getBufferIdentifier()], - ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR, - DefinedFunctions, GUIDPreservedSymbols); + ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, + ImportLists[ModuleIdentifier], ExportList, + ResolvedODR, DefinedFunctions, + GUIDPreservedSymbols); { auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); + DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" + << CacheEntry.getEntryPath() << "' for buffer " << count + << " " << ModuleIdentifier << "\n"); + if (ErrOrBuffer) { // Cache Hit! ProducedBinaries[count] = std::move(ErrOrBuffer.get()); @@ -741,14 +874,14 @@ auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); // Save temps: original file. - if (!SaveTempsDir.empty()) { - saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); - } + saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); auto &ImportList = ImportLists[ModuleIdentifier]; + // Run the main process now, and generates a binary auto OutputBuffer = ProcessThinLTOModule( *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, - ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count); + ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions, + DisableCodeGen, SaveTempsDir, count); CacheEntry.write(*OutputBuffer); ProducedBinaries[count] = std::move(OutputBuffer); Index: test/ThinLTO/X86/internalize.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/internalize.ll @@ -0,0 +1,19 @@ +;; RUN: opt -module-summary %s -o %t1.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE + +; REGULAR: define void @foo +; REGULAR: define void @bar +; INTERNALIZE: define void @foo +; INTERNALIZE: define internal void @bar + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @foo() { + ret void +} +define void @bar() { + ret void +} \ No newline at end of file Index: tools/llvm-lto/llvm-lto.cpp =================================================================== --- tools/llvm-lto/llvm-lto.cpp +++ tools/llvm-lto/llvm-lto.cpp @@ -68,6 +68,7 @@ THINLINK, THINPROMOTE, THINIMPORT, + THININTERNALIZE, THINOPT, THINCODEGEN, THINALL @@ -84,6 +85,9 @@ clEnumValN(THINIMPORT, "import", "Perform both promotion and " "cross-module importing (requires " "-thinlto-index)."), + clEnumValN(THININTERNALIZE, "internalize", + "Perform internalization driven by -exported-symbol " + "(requires -thinlto-index)."), clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."), clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"), clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"), @@ -105,10 +109,10 @@ cl::desc("Override output filename"), cl::value_desc("filename")); -static cl::list - ExportedSymbols("exported-symbol", - cl::desc("Symbol to export from the resulting object file"), - cl::ZeroOrMore); +static cl::list ExportedSymbols( + "exported-symbol", + cl::desc("List of symbols to export from the resulting object file"), + cl::ZeroOrMore); static cl::list DSOSymbols("dso-symbol", @@ -329,6 +333,10 @@ ThinLTOProcessing(const TargetOptions &Options) { ThinGenerator.setCodePICModel(RelocModel); ThinGenerator.setTargetOptions(Options); + + // Add all the exported symbols to the table of symbols to preserve. + for (unsigned i = 0; i < ExportedSymbols.size(); ++i) + ThinGenerator.preserveSymbol(ExportedSymbols[i]); } void run() { @@ -339,6 +347,8 @@ return promote(); case THINIMPORT: return import(); + case THININTERNALIZE: + return internalize(); case THINOPT: return optimize(); case THINCODEGEN: @@ -432,6 +442,37 @@ } } + void internalize() { + if (InputFilenames.size() != 1 && !OutputFilename.empty()) + report_fatal_error("Can't handle a single output filename and multiple " + "input files, do not provide an output filename and " + "the output files will be suffixed from the input " + "ones."); + + if (ExportedSymbols.empty()) + errs() << "Warning: -internalize will not perform without " + "-exported-symbol\n"; + + auto Index = loadCombinedIndex(); + auto InputBuffers = loadAllFilesForIndex(*Index); + for (auto &MemBuffer : InputBuffers) + ThinGenerator.addModule(MemBuffer->getBufferIdentifier(), + MemBuffer->getBuffer()); + + for (auto &Filename : InputFilenames) { + LLVMContext Ctx; + auto TheModule = loadModule(Filename, Ctx); + + ThinGenerator.internalize(*TheModule, *Index); + + std::string OutputName = OutputFilename; + if (OutputName.empty()) { + OutputName = Filename + ".thinlto.internalized.bc"; + } + writeModuleToFile(*TheModule, OutputName); + } + } + void optimize() { if (InputFilenames.size() != 1 && !OutputFilename.empty()) report_fatal_error("Can't handle a single output filename and multiple "