Index: lld/trunk/COFF/InputFiles.cpp =================================================================== --- lld/trunk/COFF/InputFiles.cpp +++ lld/trunk/COFF/InputFiles.cpp @@ -355,13 +355,12 @@ SymbolBody *Alias = Symtab->addUndefined(Saver.save(Fallback)); checkAndSetWeakAlias(Symtab, this, Sym->body(), Alias); } else { - Expected ComdatIndex = ObjSym.getComdatIndex(); - bool IsCOMDAT = ComdatIndex && *ComdatIndex != -1; + bool IsCOMDAT = ObjSym.getComdatIndex() != -1; Sym = Symtab->addRegular(this, SymName, IsCOMDAT); } SymbolBodies.push_back(Sym->body()); } - Directives = check(Obj->getLinkerOpts()); + Directives = Obj->getCOFFLinkerOpts(); } MachineTypes BitcodeFile::getMachineType() { Index: lld/trunk/ELF/InputFiles.cpp =================================================================== --- lld/trunk/ELF/InputFiles.cpp +++ lld/trunk/ELF/InputFiles.cpp @@ -818,7 +818,7 @@ uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); - int C = check(ObjSym.getComdatIndex(), F->LogName); + int C = ObjSym.getComdatIndex(); if (C != -1 && !KeptComdats[C]) return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, @@ -855,10 +855,8 @@ Obj = check(lto::InputFile::create(MBRef), this->LogName); std::vector KeptComdats; - for (StringRef S : Obj->getComdatTable()) { - StringRef N = Saver.save(S); - KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); - } + for (StringRef S : Obj->getComdatTable()) + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, this)); Index: llvm/trunk/include/llvm/LTO/LTO.h =================================================================== --- llvm/trunk/include/llvm/LTO/LTO.h +++ llvm/trunk/include/llvm/LTO/LTO.h @@ -24,7 +24,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/LTO/Config.h" #include "llvm/Linker/IRMover.h" -#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Support/Error.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/thread.h" @@ -79,21 +79,26 @@ struct SymbolResolution; class ThinBackendProc; -/// An input file. This is a wrapper for ModuleSymbolTable that exposes only the +/// An input file. This is a symbol table wrapper that only exposes the /// information that an LTO client should need in order to do symbol resolution. class InputFile { +public: + class Symbol; + +private: // FIXME: Remove LTO class friendship once we have bitcode symbol tables. friend LTO; InputFile() = default; - // FIXME: Remove the LLVMContext once we have bitcode symbol tables. - LLVMContext Ctx; - struct InputModule; - std::vector Mods; - ModuleSymbolTable SymTab; + std::vector Mods; + SmallVector Strtab; + std::vector Symbols; + + // [begin, end) for each module + std::vector> ModuleSymIndices; - std::vector Comdats; - DenseMap ComdatMap; + StringRef SourceFileName, COFFLinkerOpts; + std::vector ComdatTable; public: ~InputFile(); @@ -101,170 +106,48 @@ /// Create an InputFile. static Expected> create(MemoryBufferRef Object); - class symbol_iterator; - - /// This is a wrapper for ArrayRef::iterator that - /// exposes only the information that an LTO client should need in order to do - /// symbol resolution. - /// - /// This object is ephemeral; it is only valid as long as an iterator obtained - /// from symbols() refers to it. - class Symbol { - friend symbol_iterator; + /// The purpose of this class is to only expose the symbol information that an + /// LTO client should need in order to do symbol resolution. + class Symbol : irsymtab::Symbol { friend LTO; - ArrayRef::iterator I; - const ModuleSymbolTable &SymTab; - const InputFile *File; - uint32_t Flags; - SmallString<64> Name; - - bool shouldSkip() { - return !(Flags & object::BasicSymbolRef::SF_Global) || - (Flags & object::BasicSymbolRef::SF_FormatSpecific); - } - - void skip() { - ArrayRef::iterator E = SymTab.symbols().end(); - while (I != E) { - Flags = SymTab.getSymbolFlags(*I); - if (!shouldSkip()) - break; - ++I; - } - if (I == E) - return; - - Name.clear(); - { - raw_svector_ostream OS(Name); - SymTab.printSymbolName(OS, *I); - } - } - - bool isGV() const { return I->is(); } - GlobalValue *getGV() const { return I->get(); } - public: - Symbol(ArrayRef::iterator I, - const ModuleSymbolTable &SymTab, const InputFile *File) - : I(I), SymTab(SymTab), File(File) { - skip(); - } - - bool isUndefined() const { - return Flags & object::BasicSymbolRef::SF_Undefined; - } - bool isCommon() const { return Flags & object::BasicSymbolRef::SF_Common; } - bool isWeak() const { return Flags & object::BasicSymbolRef::SF_Weak; } - bool isIndirect() const { - return Flags & object::BasicSymbolRef::SF_Indirect; - } - - /// For COFF weak externals, returns the name of the symbol that is used - /// as a fallback if the weak external remains undefined. - std::string getCOFFWeakExternalFallback() const { - assert((Flags & object::BasicSymbolRef::SF_Weak) && - (Flags & object::BasicSymbolRef::SF_Indirect) && - "symbol is not a weak external"); - std::string Name; - raw_string_ostream OS(Name); - SymTab.printSymbolName( - OS, - cast( - cast(getGV())->getAliasee()->stripPointerCasts())); - OS.flush(); - return Name; - } - - /// Returns the mangled name of the global. - StringRef getName() const { return Name; } - - GlobalValue::VisibilityTypes getVisibility() const { - if (isGV()) - return getGV()->getVisibility(); - return GlobalValue::DefaultVisibility; - } - bool canBeOmittedFromSymbolTable() const { - return isGV() && llvm::canBeOmittedFromSymbolTable(getGV()); - } - bool isTLS() const { - // FIXME: Expose a thread-local flag for module asm symbols. - return isGV() && getGV()->isThreadLocal(); - } - - // Returns the index of the comdat this symbol is in or -1 if the symbol - // is not in a comdat. - // FIXME: We have to return Expected because aliases point to an - // arbitrary ConstantExpr and that might not actually be a constant. That - // means we might not be able to find what an alias is aliased to and - // so find its comdat. - Expected getComdatIndex() const; - - uint64_t getCommonSize() const { - assert(Flags & object::BasicSymbolRef::SF_Common); - if (!isGV()) - return 0; - return getGV()->getParent()->getDataLayout().getTypeAllocSize( - getGV()->getType()->getElementType()); - } - unsigned getCommonAlignment() const { - assert(Flags & object::BasicSymbolRef::SF_Common); - if (!isGV()) - return 0; - return getGV()->getAlignment(); - } - }; - - class symbol_iterator { - Symbol Sym; + Symbol(const irsymtab::Symbol &S) : irsymtab::Symbol(S) {} - public: - symbol_iterator(ArrayRef::iterator I, - const ModuleSymbolTable &SymTab, const InputFile *File) - : Sym(I, SymTab, File) {} - - symbol_iterator &operator++() { - ++Sym.I; - Sym.skip(); - return *this; - } - - symbol_iterator operator++(int) { - symbol_iterator I = *this; - ++*this; - return I; - } - - const Symbol &operator*() const { return Sym; } - const Symbol *operator->() const { return &Sym; } - - bool operator!=(const symbol_iterator &Other) const { - return Sym.I != Other.Sym.I; - } + using irsymtab::Symbol::isUndefined; + using irsymtab::Symbol::isCommon; + using irsymtab::Symbol::isWeak; + using irsymtab::Symbol::isIndirect; + using irsymtab::Symbol::getName; + using irsymtab::Symbol::getVisibility; + using irsymtab::Symbol::canBeOmittedFromSymbolTable; + using irsymtab::Symbol::isTLS; + using irsymtab::Symbol::getComdatIndex; + using irsymtab::Symbol::getCommonSize; + using irsymtab::Symbol::getCommonAlignment; + using irsymtab::Symbol::getCOFFWeakExternalFallback; }; /// A range over the symbols in this InputFile. - iterator_range symbols() { - return llvm::make_range( - symbol_iterator(SymTab.symbols().begin(), SymTab, this), - symbol_iterator(SymTab.symbols().end(), SymTab, this)); - } + ArrayRef symbols() const { return Symbols; } /// Returns linker options specified in the input file. - Expected getLinkerOpts(); + StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; } /// Returns the path to the InputFile. StringRef getName() const; /// Returns the source file path specified at compile time. - StringRef getSourceFileName() const; + StringRef getSourceFileName() const { return SourceFileName; } // Returns a table with all the comdats used by this file. - ArrayRef getComdatTable() const { return Comdats; } + ArrayRef getComdatTable() const { return ComdatTable; } private: - iterator_range module_symbols(InputModule &IM); + ArrayRef module_symbols(unsigned I) const { + const auto &Indices = ModuleSymIndices[I]; + return {Symbols.data() + Indices.first, Symbols.data() + Indices.second}; + } }; /// This class wraps an output stream for a native object. Most clients should @@ -452,20 +335,20 @@ // Global mapping from mangled symbol names to resolutions. StringMap GlobalResolutions; - void addSymbolToGlobalRes(SmallPtrSet &Used, - const InputFile::Symbol &Sym, SymbolResolution Res, + void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, unsigned Partition); // These functions take a range of symbol resolutions [ResI, ResE) and consume // the resolutions used by a single input module by incrementing ResI. After // these functions return, [ResI, ResE) will refer to the resolution range for // the remaining modules in the InputFile. - Error addModule(InputFile &Input, InputFile::InputModule &IM, + Error addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, + Error addRegularLTO(BitcodeModule BM, + ArrayRef Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addThinLTO(BitcodeModule BM, Module &M, - iterator_range Syms, + Error addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE); Error runRegularLTO(AddStreamFn AddStream); Index: llvm/trunk/include/llvm/Object/IRSymtab.h =================================================================== --- llvm/trunk/include/llvm/Object/IRSymtab.h +++ llvm/trunk/include/llvm/Object/IRSymtab.h @@ -0,0 +1,298 @@ +//===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains data definitions and a reader and builder for a symbol +// table for LLVM IR. Its purpose is to allow linkers and other consumers of +// bitcode files to efficiently read the symbol table for symbol resolution +// purposes without needing to construct a module in memory. +// +// As with most object files the symbol table has two parts: the symbol table +// itself and a string table which is referenced by the symbol table. +// +// A symbol table corresponds to a single bitcode file, which may consist of +// multiple modules, so symbol tables may likewise contain symbols for multiple +// modules. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_IRSYMTAB_H +#define LLVM_OBJECT_IRSYMTAB_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace irsymtab { +namespace storage { + +// The data structures in this namespace define the low-level serialization +// format. Clients that just want to read a symbol table should use the +// irsymtab::Reader class. + +typedef support::ulittle32_t Word; + +/// A reference to a string in the string table. +struct Str { + Word Offset; + StringRef get(StringRef Strtab) const { + return Strtab.data() + Offset; + } +}; + +/// A reference to a range of objects in the symbol table. +template struct Range { + Word Offset, Size; + ArrayRef get(StringRef Symtab) const { + return {reinterpret_cast(Symtab.data() + Offset), Size}; + } +}; + +/// Describes the range of a particular module's symbols within the symbol +/// table. +struct Module { + Word Begin, End; +}; + +/// This is equivalent to an IR comdat. +struct Comdat { + Str Name; +}; + +/// Contains the information needed by linkers for symbol resolution, as well as +/// by the LTO implementation itself. +struct Symbol { + /// The mangled symbol name. + Str Name; + + /// The unmangled symbol name, or the empty string if this is not an IR + /// symbol. + Str IRName; + + /// The index into Header::Comdats, or -1 if not a comdat member. + Word ComdatIndex; + + Word Flags; + enum FlagBits { + FB_visibility, // 2 bits + FB_undefined = FB_visibility + 2, + FB_weak, + FB_common, + FB_indirect, + FB_used, + FB_tls, + FB_may_omit, + FB_global, + FB_format_specific, + FB_unnamed_addr, + }; + + /// The index into the Uncommon table, or -1 if this symbol does not have an + /// Uncommon. + Word UncommonIndex; +}; + +/// This data structure contains rarely used symbol fields and is optionally +/// referenced by a Symbol. +struct Uncommon { + Word CommonSize, CommonAlign; + + /// COFF-specific: the name of the symbol that a weak external resolves to + /// if not defined. + Str COFFWeakExternFallbackName; +}; + +struct Header { + Range Modules; + Range Comdats; + Range Symbols; + Range Uncommons; + + Str SourceFileName; + + /// COFF-specific: linker directives. + Str COFFLinkerOpts; +}; + +} + +/// Fills in Symtab and Strtab with a valid symbol and string table for Mods. +Error build(ArrayRef Mods, SmallVector &Symtab, + SmallVector &Strtab); + +/// This represents a symbol that has been read from a storage::Symbol and +/// possibly a storage::Uncommon. +struct Symbol { + // Copied from storage::Symbol. + StringRef Name, IRName; + int ComdatIndex; + uint32_t Flags; + + // Copied from storage::Uncommon. + uint32_t CommonSize, CommonAlign; + StringRef COFFWeakExternFallbackName; + + /// Returns the mangled symbol name. + StringRef getName() const { return Name; } + + /// Returns the unmangled symbol name, or the empty string if this is not an + /// IR symbol. + StringRef getIRName() const { return IRName; } + + /// Returns the index into the comdat table (see Reader::getComdatTable()), or + /// -1 if not a comdat member. + int getComdatIndex() const { return ComdatIndex; } + + using S = storage::Symbol; + GlobalValue::VisibilityTypes getVisibility() const { + return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); + } + bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } + bool isWeak() const { return (Flags >> S::FB_weak) & 1; } + bool isCommon() const { return (Flags >> S::FB_common) & 1; } + bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } + bool isUsed() const { return (Flags >> S::FB_used) & 1; } + bool isTLS() const { return (Flags >> S::FB_tls) & 1; } + bool canBeOmittedFromSymbolTable() const { + return (Flags >> S::FB_may_omit) & 1; + } + bool isGlobal() const { return (Flags >> S::FB_global) & 1; } + bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } + bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } + + size_t getCommonSize() const { + assert(isCommon()); + return CommonSize; + } + uint32_t getCommonAlignment() const { + assert(isCommon()); + return CommonAlign; + } + + /// COFF-specific: for weak externals, returns the name of the symbol that is + /// used as a fallback if the weak external remains undefined. + StringRef getCOFFWeakExternalFallback() const { + assert(isWeak() && isIndirect()); + return COFFWeakExternFallbackName; + } +}; + +/// This class can be used to read a Symtab and Strtab produced by +/// irsymtab::build. +class Reader { + StringRef Symtab, Strtab; + + ArrayRef Modules; + ArrayRef Comdats; + ArrayRef Symbols; + ArrayRef Uncommons; + + StringRef str(storage::Str S) const { return S.get(Strtab); } + template ArrayRef range(storage::Range R) const { + return R.get(Symtab); + } + const storage::Header &header() const { + return *reinterpret_cast(Symtab.data()); + } + +public: + class SymbolRef; + + Reader() = default; + Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { + Modules = range(header().Modules); + Comdats = range(header().Comdats); + Symbols = range(header().Symbols); + Uncommons = range(header().Uncommons); + } + + typedef iterator_range> symbol_range; + + /// Returns the symbol table for the entire bitcode file. + /// The symbols enumerated by this method are ephemeral, but they can be + /// copied into an irsymtab::Symbol object. + symbol_range symbols() const; + + /// Returns a slice of the symbol table for the I'th module in the file. + /// The symbols enumerated by this method are ephemeral, but they can be + /// copied into an irsymtab::Symbol object. + symbol_range module_symbols(unsigned I) const; + + /// Returns the source file path specified at compile time. + StringRef getSourceFileName() const { return str(header().SourceFileName); } + + /// Returns a table with all the comdats used by this file. + std::vector getComdatTable() const { + std::vector ComdatTable; + ComdatTable.reserve(Comdats.size()); + for (auto C : Comdats) + ComdatTable.push_back(str(C.Name)); + return ComdatTable; + } + + /// COFF-specific: returns linker options specified in the input file. + StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } +}; + +/// Ephemeral symbols produced by Reader::symbols() and +/// Reader::module_symbols(). +class Reader::SymbolRef : public Symbol { + const storage::Symbol *SymI, *SymE; + const Reader *R; + +public: + SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, + const Reader *R) + : SymI(SymI), SymE(SymE), R(R) { + read(); + } + + void read() { + if (SymI == SymE) + return; + + Name = R->str(SymI->Name); + IRName = R->str(SymI->IRName); + ComdatIndex = SymI->ComdatIndex; + Flags = SymI->Flags; + + uint32_t UncI = SymI->UncommonIndex; + if (UncI != -1u) { + const storage::Uncommon &Unc = R->Uncommons[UncI]; + CommonSize = Unc.CommonSize; + CommonAlign = Unc.CommonAlign; + COFFWeakExternFallbackName = R->str(Unc.COFFWeakExternFallbackName); + } + } + void moveNext() { + ++SymI; + read(); + } + + bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } +}; + +inline Reader::symbol_range Reader::symbols() const { + return {SymbolRef(Symbols.begin(), Symbols.end(), this), + SymbolRef(Symbols.end(), Symbols.end(), this)}; +} + +inline Reader::symbol_range Reader::module_symbols(unsigned I) const { + const storage::Module &M = Modules[I]; + const storage::Symbol *MBegin = Symbols.begin() + M.Begin, + *MEnd = Symbols.begin() + M.End; + return {SymbolRef(MBegin, MEnd, this), SymbolRef(MEnd, MEnd, this)}; +} + +} + +} + +#endif Index: llvm/trunk/lib/LTO/LTO.cpp =================================================================== --- llvm/trunk/lib/LTO/LTO.cpp +++ llvm/trunk/lib/LTO/LTO.cpp @@ -305,14 +305,6 @@ thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported); } -struct InputFile::InputModule { - BitcodeModule BM; - std::unique_ptr Mod; - - // The range of ModuleSymbolTable entries for this input module. - size_t SymBegin, SymEnd; -}; - // Requires a destructor for std::vector. InputFile::~InputFile() = default; @@ -333,87 +325,51 @@ return make_error("Bitcode file does not contain any modules", inconvertibleErrorCode()); - // Create an InputModule for each module in the InputFile, and add it to the - // ModuleSymbolTable. + File->Mods = *BMsOrErr; + + LLVMContext Ctx; + std::vector Mods; + std::vector> OwnedMods; for (auto BM : *BMsOrErr) { Expected> MOrErr = - BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true, + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, /*IsImporting*/ false); if (!MOrErr) return MOrErr.takeError(); - size_t SymBegin = File->SymTab.symbols().size(); - File->SymTab.addModule(MOrErr->get()); - size_t SymEnd = File->SymTab.symbols().size(); - - for (const auto &C : (*MOrErr)->getComdatSymbolTable()) { - auto P = File->ComdatMap.insert( - std::make_pair(&C.second, File->Comdats.size())); - assert(P.second); - (void)P; - File->Comdats.push_back(C.first()); - } - - File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd}); + if ((*MOrErr)->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + SmallVector Symtab; + if (Error E = irsymtab::build(Mods, Symtab, File->Strtab)) + return std::move(E); + + irsymtab::Reader R({Symtab.data(), Symtab.size()}, + {File->Strtab.data(), File->Strtab.size()}); + File->SourceFileName = R.getSourceFileName(); + File->COFFLinkerOpts = R.getCOFFLinkerOpts(); + File->ComdatTable = R.getComdatTable(); + + for (unsigned I = 0; I != Mods.size(); ++I) { + size_t Begin = File->Symbols.size(); + for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I)) + // Skip symbols that are irrelevant to LTO. Note that this condition needs + // to match the one in Skip() in LTO::addRegularLTO(). + if (Sym.isGlobal() && !Sym.isFormatSpecific()) + File->Symbols.push_back(Sym); + File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); } return std::move(File); } -Expected InputFile::Symbol::getComdatIndex() const { - if (!isGV()) - return -1; - const GlobalObject *GO = getGV()->getBaseObject(); - if (!GO) - return make_error("Unable to determine comdat of alias!", - inconvertibleErrorCode()); - if (const Comdat *C = GO->getComdat()) { - auto I = File->ComdatMap.find(C); - assert(I != File->ComdatMap.end()); - return I->second; - } - return -1; -} - -Expected InputFile::getLinkerOpts() { - std::string LinkerOpts; - raw_string_ostream LOS(LinkerOpts); - // Extract linker options from module metadata. - for (InputModule &Mod : Mods) { - std::unique_ptr &M = Mod.Mod; - if (auto E = M->materializeMetadata()) - return std::move(E); - if (Metadata *Val = M->getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); - for (const MDOperand &MDOptions : LinkerOptions->operands()) - for (const MDOperand &MDOption : cast(MDOptions)->operands()) - LOS << " " << cast(MDOption)->getString(); - } - } - - // Synthesize export flags for symbols with dllexport storage. - const Triple TT(Mods[0].Mod->getTargetTriple()); - Mangler M; - for (const ModuleSymbolTable::Symbol &Sym : SymTab.symbols()) - if (auto *GV = Sym.dyn_cast()) - emitLinkerFlagsForGlobalCOFF(LOS, GV, TT, M); - LOS.flush(); - return LinkerOpts; -} - StringRef InputFile::getName() const { - return Mods[0].BM.getModuleIdentifier(); -} - -StringRef InputFile::getSourceFileName() const { - return Mods[0].Mod->getSourceFileName(); -} - -iterator_range -InputFile::module_symbols(InputModule &IM) { - return llvm::make_range( - symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this), - symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this)); + return Mods[0].getModuleIdentifier(); } LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, @@ -437,21 +393,17 @@ LTO::~LTO() = default; // Add the given symbol to the GlobalResolutions map, and resolve its partition. -void LTO::addSymbolToGlobalRes(SmallPtrSet &Used, - const InputFile::Symbol &Sym, +void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, unsigned Partition) { - GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr; - auto &GlobalRes = GlobalResolutions[Sym.getName()]; - if (GV) { - GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr(); - if (Res.Prevailing) - GlobalRes.IRName = GV->getName(); - } + GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = Sym.getIRName(); + // Set the partition to external if we know it is used elsewhere, e.g. // it is visible to a regular object, is referenced from llvm.compiler_used, // or was already recorded as being referenced from a different partition. - if (Res.VisibleToRegularObj || (GV && Used.count(GV)) || + if (Res.VisibleToRegularObj || Sym.isUsed() || (GlobalRes.Partition != GlobalResolution::Unknown && GlobalRes.Partition != Partition)) { GlobalRes.Partition = GlobalResolution::External; @@ -495,41 +447,32 @@ writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); const SymbolResolution *ResI = Res.begin(); - for (InputFile::InputModule &IM : Input->Mods) - if (Error Err = addModule(*Input, IM, ResI, Res.end())) + for (unsigned I = 0; I != Input->Mods.size(); ++I) + if (Error Err = addModule(*Input, I, ResI, Res.end())) return Err; assert(ResI == Res.end()); return Error::success(); } -Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM, +Error LTO::addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - // FIXME: move to backend - Module &M = *IM.Mod; - - if (M.getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - - if (!Conf.OverrideTriple.empty()) - M.setTargetTriple(Conf.OverrideTriple); - else if (M.getTargetTriple().empty()) - M.setTargetTriple(Conf.DefaultTriple); - - Expected HasThinLTOSummary = IM.BM.hasSummary(); + Expected HasThinLTOSummary = Input.Mods[ModI].hasSummary(); if (!HasThinLTOSummary) return HasThinLTOSummary.takeError(); + auto ModSyms = Input.module_symbols(ModI); if (*HasThinLTOSummary) - return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE); + return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE); else - return addRegularLTO(IM.BM, ResI, ResE); + return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE); } // Add a regular LTO object to the link. -Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, +Error LTO::addRegularLTO(BitcodeModule BM, + ArrayRef Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE) { if (!RegularLTO.CombinedModule) { RegularLTO.CombinedModule = @@ -550,9 +493,6 @@ ModuleSymbolTable SymTab; SymTab.addModule(&M); - SmallPtrSet Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - std::vector Keep; for (GlobalVariable &GV : M.globals()) @@ -564,17 +504,35 @@ if (GlobalObject *GO = GA.getBaseObject()) AliasedGlobals.insert(GO); - for (const InputFile::Symbol &Sym : - make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab, - nullptr), - InputFile::symbol_iterator(SymTab.symbols().end(), SymTab, - nullptr))) { + // In this function we need IR GlobalValues matching the symbols in Syms + // (which is not backed by a module), so we need to enumerate them in the same + // order. The symbol enumeration order of a ModuleSymbolTable intentionally + // matches the order of an irsymtab, but when we read the irsymtab in + // InputFile::create we omit some symbols that are irrelevant to LTO. The + // Skip() function skips the same symbols from the module as InputFile does + // from the symbol table. + auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end(); + auto Skip = [&]() { + while (MsymI != MsymE) { + auto Flags = SymTab.getSymbolFlags(*MsymI); + if ((Flags & object::BasicSymbolRef::SF_Global) && + !(Flags & object::BasicSymbolRef::SF_FormatSpecific)) + return; + ++MsymI; + } + }; + Skip(); + + for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Used, Sym, Res, 0); + addSymbolToGlobalRes(Sym, Res, 0); + + assert(MsymI != MsymE); + ModuleSymbolTable::Symbol Msym = *MsymI++; + Skip(); - if (Sym.isGV()) { - GlobalValue *GV = Sym.getGV(); + if (GlobalValue *GV = Msym.dyn_cast()) { if (Res.Prevailing) { if (Sym.isUndefined()) continue; @@ -612,7 +570,7 @@ if (Sym.isCommon()) { // FIXME: We should figure out what to do about commons defined by asm. // For now they aren't reported correctly by ModuleSymbolTable. - auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()]; + auto &CommonRes = RegularLTO.Commons[Sym.getIRName()]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment()); CommonRes.Prevailing |= Res.Prevailing; @@ -620,6 +578,7 @@ // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } + assert(MsymI == MsymE); return RegularLTO.Mover->move(std::move(*MOrErr), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, @@ -627,15 +586,10 @@ } // Add a ThinLTO object to the link. -// FIXME: This function should not need to take as many parameters once we have -// a bitcode symbol table. -Error LTO::addThinLTO(BitcodeModule BM, Module &M, - iterator_range Syms, +Error LTO::addThinLTO(BitcodeModule BM, + ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - SmallPtrSet Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - Expected> SummaryOrErr = BM.getSummary(); if (!SummaryOrErr) return SummaryOrErr.takeError(); @@ -645,11 +599,15 @@ for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1); + addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1); - if (Res.Prevailing && Sym.isGV()) - ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] = - BM.getModuleIdentifier(); + if (Res.Prevailing) { + if (!Sym.getIRName().empty()) { + auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( + Sym.getIRName(), GlobalValue::ExternalLinkage, "")); + ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + } + } } if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) Index: llvm/trunk/lib/Object/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Object/CMakeLists.txt +++ llvm/trunk/lib/Object/CMakeLists.txt @@ -8,6 +8,7 @@ ELFObjectFile.cpp Error.cpp IRObjectFile.cpp + IRSymtab.cpp MachOObjectFile.cpp MachOUniversal.cpp ModuleSummaryIndexObjectFile.cpp Index: llvm/trunk/lib/Object/IRSymtab.cpp =================================================================== --- llvm/trunk/lib/Object/IRSymtab.cpp +++ llvm/trunk/lib/Object/IRSymtab.cpp @@ -0,0 +1,228 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; +using namespace irsymtab; + +namespace { + +/// Stores the temporary state that is required to build an IR symbol table. +struct Builder { + SmallVector &Symtab; + SmallVector &Strtab; + Builder(SmallVector &Symtab, SmallVector &Strtab) + : Symtab(Symtab), Strtab(Strtab) {} + + StringTableBuilder StrtabBuilder{StringTableBuilder::ELF}; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; + + DenseMap ComdatMap; + ModuleSymbolTable Msymtab; + SmallPtrSet Used; + Mangler Mang; + Triple TT; + + std::vector Comdats; + std::vector Mods; + std::vector Syms; + std::vector Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + void setStr(storage::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + } + template + void writeRange(storage::Range &R, const std::vector &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast(Objs.data()), + reinterpret_cast(Objs.data() + Objs.size())); + } + + Error addModule(Module *M); + Error addSymbol(ModuleSymbolTable::Symbol Sym); + + Error build(ArrayRef Mods); +}; + +Error Builder::addModule(Module *M) { + collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); + + storage::Module Mod; + Mod.Begin = Msymtab.symbols().size(); + Msymtab.addModule(M); + Mod.End = Msymtab.symbols().size(); + Mods.push_back(Mod); + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (Metadata *Val = M->getModuleFlag("Linker Options")) { + MDNode *LinkerOptions = cast(Val); + for (const MDOperand &MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast(MDOption)->getString(); + } + } + + return Error::success(); +} + +Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + storage::Symbol &Sym = Syms.back(); + Sym = {}; + + Sym.UncommonIndex = -1; + storage::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> storage::Uncommon & { + if (Unc) + return *Unc; + Sym.UncommonIndex = Uncommons.size(); + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(StringRef(Name))); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << storage::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << storage::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << storage::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << storage::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast(); + if (!GV) { + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + if (Used.count(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << storage::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; + if (canBeOmittedFromSymbolTable(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; + + if (Flags & object::BasicSymbolRef::SF_Common) { + Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize( + GV->getType()->getElementType()); + Uncommon().CommonAlign = GV->getAlignment(); + } + + const GlobalObject *Base = GV->getBaseObject(); + if (!Base) + return make_error("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = Base->getComdat()) { + auto P = ComdatMap.insert(std::make_pair(C, Comdats.size())); + Sym.ComdatIndex = P.first->second; + + if (P.second) { + storage::Comdat Comdat; + setStr(Comdat.Name, C->getName()); + Comdats.push_back(Comdat); + } + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if ((Flags & object::BasicSymbolRef::SF_Weak) && + (Flags & object::BasicSymbolRef::SF_Indirect)) { + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName( + OS, cast( + cast(GV)->getAliasee()->stripPointerCasts())); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + return Error::success(); +} + +Error Builder::build(ArrayRef IRMods) { + storage::Header Hdr; + + assert(!IRMods.empty()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + // This adds the symbols for each module to Msymtab. + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msym)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(storage::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + + *reinterpret_cast(Symtab.data()) = Hdr; + + raw_svector_ostream OS(Strtab); + StrtabBuilder.finalizeInOrder(); + StrtabBuilder.write(OS); + + return Error::success(); +} + +} // anonymous namespace + +Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, + SmallVector &Strtab) { + return Builder(Symtab, Strtab).build(Mods); +} Index: llvm/trunk/tools/gold/gold-plugin.cpp =================================================================== --- llvm/trunk/tools/gold/gold-plugin.cpp +++ llvm/trunk/tools/gold/gold-plugin.cpp @@ -465,7 +465,7 @@ EC == object::object_error::bitcode_section_not_found) *claimed = 0; else - message(LDPL_ERROR, + message(LDPL_FATAL, "LLVM gold plugin has failed to create LTO module: %s", EI.message().c_str()); }); @@ -536,7 +536,7 @@ sym.size = 0; sym.comdat_key = nullptr; - int CI = check(Sym.getComdatIndex()); + int CI = Sym.getComdatIndex(); if (CI != -1) { StringRef C = Obj->getComdatTable()[CI]; sym.comdat_key = strdup(C.str().c_str());