Index: include/llvm/LTO/ThinLTOCodeGenerator.h =================================================================== --- include/llvm/LTO/ThinLTOCodeGenerator.h +++ include/llvm/LTO/ThinLTOCodeGenerator.h @@ -201,6 +201,11 @@ void crossModuleImport(Module &Module, ModuleSummaryIndex &Index); /** + * Perform internalization. + */ + void internalize(Module &Module, ModuleSummaryIndex &Index); + + /** * Perform post-importing ThinLTO optimizations. */ void optimize(Module &Module); Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -17,6 +17,8 @@ #ifdef HAVE_LLVM_REVISION #include "LLVMLTORevision.h" #endif + +#include "UpdateCompilerUsed.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -31,6 +33,13 @@ #include "llvm/IR/Mangler.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/Debug.h" @@ -38,12 +47,14 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" +#include "llvm/Object/RecordStreamer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -309,6 +320,143 @@ PM.run(TheModule); } +// Parse inline ASM and collect the list of symbols that are not defined in +// the current module. This is inspired from IRObjectFile. +static void CollectAsmUndefinedRefs(Module &TheModule, + StringSet<> &AsmUndefinedRefs) { + const std::string &InlineAsm = TheModule.getModuleInlineAsm(); + if (InlineAsm.empty()) + return; + + Triple TT(TheModule.getTargetTriple()); + std::string Err; + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + if (!T) + return; + + std::unique_ptr MRI(T->createMCRegInfo(TT.str())); + if (!MRI) + return; + + std::unique_ptr MAI(T->createMCAsmInfo(*MRI, TT.str())); + if (!MAI) + return; + + std::unique_ptr STI( + T->createMCSubtargetInfo(TT.str(), "", "")); + if (!STI) + return; + + std::unique_ptr MCII(T->createMCInstrInfo()); + if (!MCII) + return; + + MCObjectFileInfo MOFI; + MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); + MOFI.InitMCObjectFileInfo(TT, Reloc::Default, CodeModel::Default, MCCtx); + std::unique_ptr Streamer(new RecordStreamer(MCCtx)); + T->createNullTargetStreamer(*Streamer); + + std::unique_ptr Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, MCCtx, *Streamer, *MAI)); + + MCTargetOptions MCOptions; + std::unique_ptr TAP( + T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + return; + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return; + + for (auto &KV : *Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("InlineASM error"); + case RecordStreamer::DefinedGlobal: + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + if (!TheModule.getNamedValue(Key)) + AsmUndefinedRefs.insert(Key); + break; + } + } +} + +// +static DenseSet computePreservedSymbolsForModule( + Module &TheModule, const DenseSet &GUIDPreservedSymbols, + const FunctionImporter::ExportSetTy &ExportList) { + DenseSet PreservedGV; + if (GUIDPreservedSymbols.empty()) + // Be friendly and don't nuke totally the module when the client didn't + // supply anything to preserve. + return PreservedGV; + + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + auto AddPreserveGV = [&](const GlobalValue &GV) { + auto GUID = GV.getGUID(); + if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID)) + PreservedGV.insert(&GV); + }; + + for (auto &GV : TheModule) + AddPreserveGV(GV); + for (auto &GV : TheModule.globals()) + AddPreserveGV(GV); + for (auto &GV : TheModule.aliases()) + AddPreserveGV(GV); + + return PreservedGV; +} + +// Run internalization on \p TheModule +static void +doInternalizeModule(Module &TheModule, const TargetMachine &TM, + const DenseSet &PreservedGV) { + if (PreservedGV.empty()) { + // Be friendly and don't nuke totally the module when the client didn't + // supply anything to preserve. + return; + } + StringSet<> AsmUndefinedRefs; + CollectAsmUndefinedRefs(TheModule, AsmUndefinedRefs); + + // Update the llvm.compiler_used globals to force preserving libcalls and + // symbols referenced from asm + UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs); + + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + auto MustPreserveGV = + [&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); }; + + llvm::internalizeModule(TheModule, MustPreserveGV); +} + +// Convert the PreservedSymbols map from "Name" based to "GUID" based. +static DenseSet +computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, + const Triple &TheTriple) { + DenseSet GUIDPreservedSymbols(PreservedSymbols.size()); + for (auto &Entry : PreservedSymbols) { + StringRef Name = Entry.first(); + if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') + Name = Name.drop_front(); + GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); + } + return GUIDPreservedSymbols; +} + std::unique_ptr codegenModule(Module &TheModule, TargetMachine &TM) { SmallVector OutputBuffer; @@ -395,6 +543,9 @@ sys::path::append(EntryPath, CachePath, toHex(Hasher.result())); } + // Access the path to this entry in the cache. + StringRef getEntryPath() { return EntryPath; } + // Try loading the buffer for this cache entry. ErrorOr> tryLoadingBuffer() { if (EntryPath.empty()) @@ -429,6 +580,8 @@ Module &TheModule, const ModuleSummaryIndex &Index, StringMap &ModuleMap, TargetMachine &TM, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const DenseSet &GUIDPreservedSymbols, std::map &ResolvedODR, ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, unsigned count) { @@ -436,6 +589,13 @@ // Save temps: after IPO. saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc"); + // Prepare for internalization by computing the set of symbols to preserve. + // We need to compute the list of symbols to preserve during internalization + // before doing any promotion because after renaming we won't (easily) match + // to the original name. + auto PreservedGV = computePreservedSymbolsForModule( + TheModule, GUIDPreservedSymbols, ExportList); + // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); @@ -449,16 +609,24 @@ // Save temps: after promotion. saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc"); + } + + // Internalization + doInternalizeModule(TheModule, TM, PreservedGV); + // Save internalized bitcode + saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc"); + + if (!SingleModule) { crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); // Save temps: after cross-module import. - saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); + saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc"); } optimizeModule(TheModule, TM); - saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc"); + saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc"); if (DisableCodeGen) { // Configured to stop before CodeGen, serialize the bitcode and return. @@ -516,7 +684,10 @@ } void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { - CrossReferencedSymbols.insert(Name); + // At the moment, we don't take advantage of this extra information, we're + // conservatively considering cross-references as preserved. + // CrossReferencedSymbols.insert(Name); + PreservedSymbols.insert(Name); } // TargetMachine factory @@ -620,10 +791,43 @@ } /** + * Perform internalization. + */ +void ThinLTOCodeGenerator::internalize(Module &TheModule, + ModuleSummaryIndex &Index) { + initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap> + ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Generate import/export list + StringMap ImportLists(ModuleCount); + StringMap ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + auto &ExportList = ExportLists[ModuleIdentifier]; + + // Internalization + auto PreservedGV = computePreservedSymbolsForModule( + TheModule, GUIDPreservedSymbols, ExportList); + doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV); +} + +/** * Perform post-importing ThinLTO optimizations. */ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + + // Optimize now optimizeModule(TheModule, *TMBuilder.create()); } @@ -694,10 +898,9 @@ ExportLists); // Convert the preserved symbols set from string to GUID, this is needed for - // computing the caching. - DenseSet GUIDPreservedSymbols(PreservedSymbols.size()); - for (auto &Entry : PreservedSymbols) - GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first())); + // computing the caching hash and the internalization. + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Parallel optimizer + codegen { @@ -714,18 +917,21 @@ // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. std::map ResolvedODR; - ResolveODR(*Index, ExportList, DefinedFunctions, - ModuleIdentifier, ResolvedODR); + ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier, + ResolvedODR); // The module may be cached, this helps handling it. - ModuleCacheEntry CacheEntry( - CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(), - ImportLists[ModuleBuffer.getBufferIdentifier()], - ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR, - DefinedFunctions, GUIDPreservedSymbols); + ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, + ImportLists[ModuleIdentifier], ExportList, + ResolvedODR, DefinedFunctions, + GUIDPreservedSymbols); { auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); + DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" + << CacheEntry.getEntryPath() << "' for buffer " << count + << " " << ModuleIdentifier << "\n"); + if (ErrOrBuffer) { // Cache Hit! ProducedBinaries[count] = std::move(ErrOrBuffer.get()); @@ -741,14 +947,14 @@ auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); // Save temps: original file. - if (!SaveTempsDir.empty()) { - saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); - } + saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); auto &ImportList = ImportLists[ModuleIdentifier]; + // Run the main process now, and generates a binary auto OutputBuffer = ProcessThinLTOModule( *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, - ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count); + ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions, + DisableCodeGen, SaveTempsDir, count); CacheEntry.write(*OutputBuffer); ProducedBinaries[count] = std::move(OutputBuffer); Index: lib/Object/IRObjectFile.cpp =================================================================== --- lib/Object/IRObjectFile.cpp +++ lib/Object/IRObjectFile.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/IRObjectFile.h" -#include "RecordStreamer.h" +#include "llvm/Object/RecordStreamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/GVMaterializer.h" Index: lib/Object/RecordStreamer.h =================================================================== --- /dev/null +++ lib/Object/RecordStreamer.h @@ -1,42 +0,0 @@ -//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H -#define LLVM_LIB_OBJECT_RECORDSTREAMER_H - -#include "llvm/MC/MCStreamer.h" - -namespace llvm { -class RecordStreamer : public MCStreamer { -public: - enum State { NeverSeen, Global, Defined, DefinedGlobal, Used }; - -private: - StringMap Symbols; - void markDefined(const MCSymbol &Symbol); - void markGlobal(const MCSymbol &Symbol); - void markUsed(const MCSymbol &Symbol); - void visitUsedSymbol(const MCSymbol &Sym) override; - -public: - typedef StringMap::const_iterator const_iterator; - const_iterator begin(); - const_iterator end(); - RecordStreamer(MCContext &Context); - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; - void EmitLabel(MCSymbol *Symbol) override; - void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; - bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; - void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override; - void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override; -}; -} -#endif Index: lib/Object/RecordStreamer.cpp =================================================================== --- lib/Object/RecordStreamer.cpp +++ lib/Object/RecordStreamer.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "RecordStreamer.h" +#include "llvm/Object/RecordStreamer.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; Index: test/ThinLTO/X86/internalize.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/internalize.ll @@ -0,0 +1,19 @@ +;; RUN: opt -module-summary %s -o %t1.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE + +; REGULAR: define void @foo +; REGULAR: define void @bar +; INTERNALIZE: define void @foo +; INTERNALIZE: define internal void @bar + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @foo() { + ret void +} +define void @bar() { + ret void +} \ No newline at end of file Index: tools/llvm-lto/llvm-lto.cpp =================================================================== --- tools/llvm-lto/llvm-lto.cpp +++ tools/llvm-lto/llvm-lto.cpp @@ -68,6 +68,7 @@ THINLINK, THINPROMOTE, THINIMPORT, + THININTERNALIZE, THINOPT, THINCODEGEN, THINALL @@ -84,6 +85,9 @@ clEnumValN(THINIMPORT, "import", "Perform both promotion and " "cross-module importing (requires " "-thinlto-index)."), + clEnumValN(THININTERNALIZE, "internalize", + "Perform internalization driven by -exported-symbol " + "(requires -thinlto-index)."), clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."), clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"), clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"), @@ -105,10 +109,10 @@ cl::desc("Override output filename"), cl::value_desc("filename")); -static cl::list - ExportedSymbols("exported-symbol", - cl::desc("Symbol to export from the resulting object file"), - cl::ZeroOrMore); +static cl::list ExportedSymbols( + "exported-symbol", + cl::desc("List of symbols to export from the resulting object file"), + cl::ZeroOrMore); static cl::list DSOSymbols("dso-symbol", @@ -329,6 +333,10 @@ ThinLTOProcessing(const TargetOptions &Options) { ThinGenerator.setCodePICModel(RelocModel); ThinGenerator.setTargetOptions(Options); + + // Add all the exported symbols to the table of symbols to preserve. + for (unsigned i = 0; i < ExportedSymbols.size(); ++i) + ThinGenerator.preserveSymbol(ExportedSymbols[i]); } void run() { @@ -339,6 +347,8 @@ return promote(); case THINIMPORT: return import(); + case THININTERNALIZE: + return internalize(); case THINOPT: return optimize(); case THINCODEGEN: @@ -432,6 +442,37 @@ } } + void internalize() { + if (InputFilenames.size() != 1 && !OutputFilename.empty()) + report_fatal_error("Can't handle a single output filename and multiple " + "input files, do not provide an output filename and " + "the output files will be suffixed from the input " + "ones."); + + if (ExportedSymbols.empty()) + errs() << "Warning: -internalize will not perform without " + "-exported-symbol\n"; + + auto Index = loadCombinedIndex(); + auto InputBuffers = loadAllFilesForIndex(*Index); + for (auto &MemBuffer : InputBuffers) + ThinGenerator.addModule(MemBuffer->getBufferIdentifier(), + MemBuffer->getBuffer()); + + for (auto &Filename : InputFilenames) { + LLVMContext Ctx; + auto TheModule = loadModule(Filename, Ctx); + + ThinGenerator.internalize(*TheModule, *Index); + + std::string OutputName = OutputFilename; + if (OutputName.empty()) { + OutputName = Filename + ".thinlto.internalized.bc"; + } + writeModuleToFile(*TheModule, OutputName); + } + } + void optimize() { if (InputFilenames.size() != 1 && !OutputFilename.empty()) report_fatal_error("Can't handle a single output filename and multiple "