Index: lld/COFF/InputFiles.cpp =================================================================== --- lld/COFF/InputFiles.cpp +++ lld/COFF/InputFiles.cpp @@ -343,27 +343,24 @@ MemoryBufferRef(MB.getBuffer(), Saver.save(MB.getBufferIdentifier())))); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { StringRef SymName = Saver.save(ObjSym.getName()); - auto Flags = ObjSym.getFlags(); Symbol *Sym; - if (Flags & object::BasicSymbolRef::SF_Undefined) { + if (ObjSym.isUndefined()) { Sym = Symtab->addUndefined(SymName, this, false); - } else if (Flags & object::BasicSymbolRef::SF_Common) { + } else if (ObjSym.isCommon()) { Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); - } else if ((Flags & object::BasicSymbolRef::SF_Weak) && - (Flags & object::BasicSymbolRef::SF_Indirect)) { + } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { // Weak external. Sym = Symtab->addUndefined(SymName, this, true); std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); SymbolBody *Alias = Symtab->addUndefined(Saver.save(Fallback)); checkAndSetWeakAlias(Symtab, this, Sym->body(), Alias); } else { - Expected ComdatIndex = ObjSym.getComdatIndex(); - bool IsCOMDAT = ComdatIndex && *ComdatIndex != -1; + bool IsCOMDAT = ObjSym.getComdatIndex() != -1; Sym = Symtab->addRegular(this, SymName, IsCOMDAT); } SymbolBodies.push_back(Sym->body()); } - Directives = check(Obj->getLinkerOpts()); + Directives = Obj->getCOFFLinkerOpts(); } MachineTypes BitcodeFile::getMachineType() { Index: lld/COFF/LTO.cpp =================================================================== --- lld/COFF/LTO.cpp +++ lld/COFF/LTO.cpp @@ -105,9 +105,7 @@ // flags an undefined in IR with a definition in ASM as prevailing. // Once IRObjectFile is fixed to report only one symbol this hack can // be removed. - R.Prevailing = - !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && - B->getFile() == &F; + R.Prevailing = !ObjSym.isUndefined() && B->getFile() == &F; R.VisibleToRegularObj = Sym->IsUsedInRegularObj; if (R.Prevailing) undefine(Sym); Index: lld/ELF/InputFiles.cpp =================================================================== --- lld/ELF/InputFiles.cpp +++ lld/ELF/InputFiles.cpp @@ -792,25 +792,24 @@ const lto::InputFile::Symbol &ObjSym, BitcodeFile *F) { StringRef NameRef = Saver.save(ObjSym.getName()); - uint32_t Flags = ObjSym.getFlags(); - uint32_t Binding = (Flags & BasicSymbolRef::SF_Weak) ? STB_WEAK : STB_GLOBAL; + uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL; uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); - int C = check(ObjSym.getComdatIndex()); + int C = ObjSym.getComdatIndex(); if (C != -1 && !KeptComdats[C]) return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, F); - if (Flags & BasicSymbolRef::SF_Undefined) + if (ObjSym.isUndefined()) return Symtab::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, F); - if (Flags & BasicSymbolRef::SF_Common) + if (ObjSym.isCommon()) return Symtab::X->addCommon(NameRef, ObjSym.getCommonSize(), ObjSym.getCommonAlignment(), Binding, Visibility, STT_OBJECT, F); @@ -835,10 +834,8 @@ utostr(OffsetInArchive))))); std::vector KeptComdats; - for (StringRef S : Obj->getComdatTable()) { - StringRef N = Saver.save(S); - KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); - } + for (StringRef S : Obj->getComdatTable()) + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, this)); @@ -956,7 +953,7 @@ std::unique_ptr Obj = check(lto::InputFile::create(this->MB)); std::vector V; for (const lto::InputFile::Symbol &Sym : Obj->symbols()) - if (!(Sym.getFlags() & BasicSymbolRef::SF_Undefined)) + if (!Sym.isUndefined()) V.push_back(Saver.save(Sym.getName())); return V; } Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -125,9 +125,7 @@ // flags an undefined in IR with a definition in ASM as prevailing. // Once IRObjectFile is fixed to report only one symbol this hack can // be removed. - R.Prevailing = - !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && - B->File == &F; + R.Prevailing = !ObjSym.isUndefined() && B->File == &F; R.VisibleToRegularObj = Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); Index: llvm/include/llvm/LTO/LTO.h =================================================================== --- llvm/include/llvm/LTO/LTO.h +++ llvm/include/llvm/LTO/LTO.h @@ -24,7 +24,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/LTO/Config.h" #include "llvm/Linker/IRMover.h" -#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Support/Error.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/thread.h" @@ -86,14 +86,9 @@ friend LTO; InputFile() = default; - // FIXME: Remove the LLVMContext once we have bitcode symbol tables. - LLVMContext Ctx; - struct InputModule; - std::vector Mods; - ModuleSymbolTable SymTab; - - std::vector Comdats; - DenseMap ComdatMap; + std::vector Mods; + SmallVector SymtabData, StrtabData; + irsymtab::Reader SymTab; public: ~InputFile(); @@ -103,7 +98,7 @@ class symbol_iterator; - /// This is a wrapper for ArrayRef::iterator that + /// This is a wrapper for ArrayRef::iterator that /// exposes only the information that an LTO client should need in order to do /// symbol resolution. /// @@ -113,98 +108,62 @@ friend symbol_iterator; friend LTO; - ArrayRef::iterator I; - const ModuleSymbolTable &SymTab; + ArrayRef::iterator I; + const irsymtab::Reader &SymTab; const InputFile *File; - uint32_t Flags; - SmallString<64> Name; - bool shouldSkip() { - return !(Flags & object::BasicSymbolRef::SF_Global) || - (Flags & object::BasicSymbolRef::SF_FormatSpecific); - } + bool shouldSkip() { return !I->isGlobal() || I->isFormatSpecific(); } void skip() { - ArrayRef::iterator E = SymTab.symbols().end(); + ArrayRef::iterator E = SymTab.symbols().end(); while (I != E) { - Flags = SymTab.getSymbolFlags(*I); if (!shouldSkip()) break; ++I; } - if (I == E) - return; - - Name.clear(); - { - raw_svector_ostream OS(Name); - SymTab.printSymbolName(OS, *I); - } } - bool isGV() const { return I->is(); } - GlobalValue *getGV() const { return I->get(); } - public: - Symbol(ArrayRef::iterator I, - const ModuleSymbolTable &SymTab, const InputFile *File) + Symbol(ArrayRef::iterator I, + const irsymtab::Reader &SymTab, const InputFile *File) : I(I), SymTab(SymTab), File(File) { skip(); } - /// For COFF weak externals, returns the name of the symbol that is used - /// as a fallback if the weak external remains undefined. - std::string getCOFFWeakExternalFallback() const { - assert((Flags & object::BasicSymbolRef::SF_Weak) && - (Flags & object::BasicSymbolRef::SF_Indirect) && - "symbol is not a weak external"); - std::string Name; - raw_string_ostream OS(Name); - SymTab.printSymbolName( - OS, - cast( - cast(getGV())->getAliasee()->stripPointerCasts())); - OS.flush(); - return Name; - } + bool isUndefined() const { return I->isUndefined(); } + bool isCommon() const { return I->isCommon(); } + bool isWeak() const { return I->isWeak(); } + bool isIndirect() const { return I->isIndirect(); } /// Returns the mangled name of the global. - StringRef getName() const { return Name; } + StringRef getName() const { return SymTab.str(I->Name); } - uint32_t getFlags() const { return Flags; } GlobalValue::VisibilityTypes getVisibility() const { - if (isGV()) - return getGV()->getVisibility(); - return GlobalValue::DefaultVisibility; + return I->getVisibility(); } bool canBeOmittedFromSymbolTable() const { - return isGV() && llvm::canBeOmittedFromSymbolTable(getGV()); - } - bool isTLS() const { - // FIXME: Expose a thread-local flag for module asm symbols. - return isGV() && getGV()->isThreadLocal(); + return I->canBeOmittedFromSymbolTable(); } + bool isTLS() const { return I->isTLS(); } // Returns the index of the comdat this symbol is in or -1 if the symbol // is not in a comdat. - // FIXME: We have to return Expected because aliases point to an - // arbitrary ConstantExpr and that might not actually be a constant. That - // means we might not be able to find what an alias is aliased to and - // so find its comdat. - Expected getComdatIndex() const; + int getComdatIndex() const { return I->ComdatIndex; } uint64_t getCommonSize() const { - assert(Flags & object::BasicSymbolRef::SF_Common); - if (!isGV()) - return 0; - return getGV()->getParent()->getDataLayout().getTypeAllocSize( - getGV()->getType()->getElementType()); + assert(I->isCommon()); + return SymTab.uncommon(*I).CommonSize; } unsigned getCommonAlignment() const { - assert(Flags & object::BasicSymbolRef::SF_Common); - if (!isGV()) - return 0; - return getGV()->getAlignment(); + assert(I->isCommon()); + return SymTab.uncommon(*I).CommonAlign; + } + + /// For COFF weak externals, returns the name of the symbol that is used + /// as a fallback if the weak external remains undefined. + StringRef getCOFFWeakExternalFallback() const { + assert(I->isWeak() && I->isIndirect()); + return SymTab.str(SymTab.uncommon(*I).COFFWeakExternFallbackName); } }; @@ -212,8 +171,8 @@ Symbol Sym; public: - symbol_iterator(ArrayRef::iterator I, - const ModuleSymbolTable &SymTab, const InputFile *File) + symbol_iterator(ArrayRef::iterator I, + const irsymtab::Reader &SymTab, const InputFile *File) : Sym(I, SymTab, File) {} symbol_iterator &operator++() { @@ -244,7 +203,9 @@ } /// Returns linker options specified in the input file. - Expected getLinkerOpts(); + StringRef getCOFFLinkerOpts() { + return SymTab.str(SymTab.header().COFFLinkerOpts); + } /// Returns the path to the InputFile. StringRef getName() const; @@ -253,10 +214,15 @@ StringRef getSourceFileName() const; // Returns a table with all the comdats used by this file. - ArrayRef getComdatTable() const { return Comdats; } + std::vector getComdatTable() const { + std::vector R; + for (auto C : SymTab.comdats()) + R.push_back(SymTab.str(C.Name)); + return R; + } private: - iterator_range module_symbols(InputModule &IM); + iterator_range module_symbols(unsigned I); }; /// This class wraps an output stream for a native object. Most clients should @@ -444,19 +410,20 @@ // Global mapping from mangled symbol names to resolutions. StringMap GlobalResolutions; - void addSymbolToGlobalRes(SmallPtrSet &Used, - const InputFile::Symbol &Sym, SymbolResolution Res, + void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, unsigned Partition); // These functions take a range of symbol resolutions [ResI, ResE) and consume // the resolutions used by a single input module by incrementing ResI. After // these functions return, [ResI, ResE) will refer to the resolution range for // the remaining modules in the InputFile. - Error addModule(InputFile &Input, InputFile::InputModule &IM, + Error addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, + Error addRegularLTO(BitcodeModule BM, + iterator_range Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addThinLTO(BitcodeModule BM, Module &M, + Error addThinLTO(BitcodeModule BM, iterator_range Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE); Index: llvm/include/llvm/Object/IRSymtab.h =================================================================== --- /dev/null +++ llvm/include/llvm/Object/IRSymtab.h @@ -0,0 +1,170 @@ +//===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains data definitions for a symbol table for LLVM IR. It is +// designed to be easy to consume and serve as a serialization format for the +// symbol table. Its purpose is to allow linkers and other consumers of bitcode +// files to efficiently read the symbol table for symbol resolution purposes +// without needing to construct a module in memory. +// +// As with most object files the symbol table has two parts: the symbol table +// itself and a string table which is referenced by the symbol table. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_IRSYMTAB_H +#define LLVM_OBJECT_IRSYMTAB_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace irsymtab { + +typedef support::ulittle32_t Word; + +/// A reference to a string in the string table. +struct Str { + Word Offset; + StringRef get(StringRef Strtab) const { + return Strtab.data() + Offset; + } +}; + +/// A reference to a range of objects in the symbol table. +template struct Range { + Word Offset, Size; + ArrayRef get(StringRef Symtab) const { + return {reinterpret_cast(Symtab.data() + Offset), Size}; + } +}; + +/// Describes the range of a particular module's symbols within the symbol +/// table. +struct Module { + Word Begin, End; +}; + +/// This is equivalent to an IR comdat. +struct Comdat { + Str Name; +}; + +/// Contains the information needed by linkers for symbol resolution, as well as +/// by the LTO implementation itself. +struct Symbol { + /// The mangled symbol name. + Str Name; + + /// The unmangled symbol name, or the empty string if this is not an IR + /// symbol. + Str IRName; + + /// The index into Header::Comdats, or -1 if not a comdat member. + Word ComdatIndex; + + Word Flags; + enum FlagBits { + FB_visibility, // 2 bits + FB_undefined = FB_visibility + 2, + FB_weak, + FB_common, + FB_indirect, + FB_used, + FB_tls, + FB_may_omit, + FB_global, + FB_format_specific, + FB_unnamed_addr, + }; + GlobalValue::VisibilityTypes getVisibility() const { + return GlobalValue::VisibilityTypes((Flags >> FB_visibility) & 3); + } + bool isUndefined() const { return (Flags >> FB_undefined) & 1; } + bool isWeak() const { return (Flags >> FB_weak) & 1; } + bool isCommon() const { return (Flags >> FB_common) & 1; } + bool isIndirect() const { return (Flags >> FB_indirect) & 1; } + bool isUsed() const { return (Flags >> FB_used) & 1; } + bool isTLS() const { return (Flags >> FB_tls) & 1; } + bool canBeOmittedFromSymbolTable() const { return (Flags >> FB_may_omit) & 1; } + bool isGlobal() const { return (Flags >> FB_global) & 1; } + bool isFormatSpecific() const { return (Flags >> FB_format_specific) & 1; } + bool isUnnamedAddr() const { return (Flags >> FB_unnamed_addr) & 1; } + + /// The index into the Uncommon table, or -1 if this symbol does not have an + /// Uncommon. + Word UncommonIndex; +}; + +/// This data structure contains rarely used symbol fields and is optionally +/// referenced by a Symbol. +struct Uncommon { + Word CommonSize, CommonAlign; + + /// COFF-specific: the name of the symbol that a weak external resolves to + /// if not defined. + Str COFFWeakExternFallbackName; +}; + +struct Header { + /// The producer's version string (LLVM_VERSION_STRING). Consumers should + /// rebuild the symbol table from IR if the producer's version does not match + /// the consumer's version due to potential differences in symbol table + /// format, symbol enumeration order and so on. + Str Producer; + + Range Modules; + Range Comdats; + Range Symbols; + Range Uncommons; + + Str SourceFileName; + + /// COFF-specific: linker directives. + Str COFFLinkerOpts; +}; + +struct Reader { + StringRef Symtab, Strtab; + + Reader() = default; + Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) {} + + const Header &header() const { + return *reinterpret_cast(Symtab.data()); + } + + StringRef str(Str S) const { return S.get(Strtab); } + + ArrayRef modules() const { return range(header().Modules); } + ArrayRef comdats() const { return range(header().Comdats); } + ArrayRef symbols() const { return range(header().Symbols); } + ArrayRef uncommons() const { return range(header().Uncommons); } + + const Uncommon &uncommon(const Symbol &Sym) const { + assert(Sym.UncommonIndex != -1u); + return uncommons()[Sym.UncommonIndex]; + } + +private: + template ArrayRef range(Range R) const { + return R.get(Symtab); + } +}; + +} + +/// Fills in Symtab and Strtab with a valid symbol and string table for Mods. +Error writeSymtab(ArrayRef Mods, SmallVector &Symtab, + SmallVector &Strtab); + +} + +#endif Index: llvm/lib/LTO/LTO.cpp =================================================================== --- llvm/lib/LTO/LTO.cpp +++ llvm/lib/LTO/LTO.cpp @@ -25,6 +25,8 @@ #include "llvm/IR/Metadata.h" #include "llvm/LTO/LTOBackend.h" #include "llvm/Linker/IRMover.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/ManagedStatic.h" @@ -32,6 +34,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" @@ -304,14 +307,6 @@ thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported); } -struct InputFile::InputModule { - BitcodeModule BM; - std::unique_ptr Mod; - - // The range of ModuleSymbolTable entries for this input module. - size_t SymBegin, SymEnd; -}; - // Requires a destructor for std::vector. InputFile::~InputFile() = default; @@ -332,87 +327,50 @@ return make_error("Bitcode file does not contain any modules", inconvertibleErrorCode()); - // Create an InputModule for each module in the InputFile, and add it to the - // ModuleSymbolTable. + File->Mods = *BMsOrErr; + + LLVMContext Ctx; + std::vector Mods; + std::vector> OwnedMods; for (auto BM : *BMsOrErr) { Expected> MOrErr = - BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true, + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, /*IsImporting*/ false); if (!MOrErr) return MOrErr.takeError(); - size_t SymBegin = File->SymTab.symbols().size(); - File->SymTab.addModule(MOrErr->get()); - size_t SymEnd = File->SymTab.symbols().size(); + if ((*MOrErr)->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); - for (const auto &C : (*MOrErr)->getComdatSymbolTable()) { - auto P = File->ComdatMap.insert( - std::make_pair(&C.second, File->Comdats.size())); - assert(P.second); - (void)P; - File->Comdats.push_back(C.first()); - } - - File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd}); + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); } - return std::move(File); -} + if (Error E = writeSymtab(Mods, File->SymtabData, File->StrtabData)) + return std::move(E); -Expected InputFile::Symbol::getComdatIndex() const { - if (!isGV()) - return -1; - const GlobalObject *GO = getGV()->getBaseObject(); - if (!GO) - return make_error("Unable to determine comdat of alias!", - inconvertibleErrorCode()); - if (const Comdat *C = GO->getComdat()) { - auto I = File->ComdatMap.find(C); - assert(I != File->ComdatMap.end()); - return I->second; - } - return -1; -} - -Expected InputFile::getLinkerOpts() { - std::string LinkerOpts; - raw_string_ostream LOS(LinkerOpts); - // Extract linker options from module metadata. - for (InputModule &Mod : Mods) { - std::unique_ptr &M = Mod.Mod; - if (auto E = M->materializeMetadata()) - return std::move(E); - if (Metadata *Val = M->getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); - for (const MDOperand &MDOptions : LinkerOptions->operands()) - for (const MDOperand &MDOption : cast(MDOptions)->operands()) - LOS << " " << cast(MDOption)->getString(); - } - } - - // Synthesize export flags for symbols with dllexport storage. - const Triple TT(Mods[0].Mod->getTargetTriple()); - Mangler M; - for (const ModuleSymbolTable::Symbol &Sym : SymTab.symbols()) - if (auto *GV = Sym.dyn_cast()) - emitLinkerFlagsForGlobalCOFF(LOS, GV, TT, M); - LOS.flush(); - return LinkerOpts; + File->SymTab = + irsymtab::Reader({File->SymtabData.data(), File->SymtabData.size()}, + {File->StrtabData.data(), File->StrtabData.size()}); + return std::move(File); } StringRef InputFile::getName() const { - return Mods[0].BM.getModuleIdentifier(); + return Mods[0].getModuleIdentifier(); } StringRef InputFile::getSourceFileName() const { - return Mods[0].Mod->getSourceFileName(); + return SymTab.str(SymTab.header().SourceFileName); } iterator_range -InputFile::module_symbols(InputModule &IM) { +InputFile::module_symbols(unsigned I) { return llvm::make_range( - symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this), - symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this)); + symbol_iterator(SymTab.symbols().data() + SymTab.modules()[I].Begin, + SymTab, this), + symbol_iterator(SymTab.symbols().data() + SymTab.modules()[I].End, SymTab, + this)); } LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, @@ -436,21 +394,17 @@ LTO::~LTO() = default; // Add the given symbol to the GlobalResolutions map, and resolve its partition. -void LTO::addSymbolToGlobalRes(SmallPtrSet &Used, - const InputFile::Symbol &Sym, +void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, unsigned Partition) { - GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr; - auto &GlobalRes = GlobalResolutions[Sym.getName()]; - if (GV) { - GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr(); - if (Res.Prevailing) - GlobalRes.IRName = GV->getName(); - } + GlobalRes.UnnamedAddr &= Sym.I->isUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = Sym.SymTab.str(Sym.I->IRName); + // Set the partition to external if we know it is used elsewhere, e.g. // it is visible to a regular object, is referenced from llvm.compiler_used, // or was already recorded as being referenced from a different partition. - if (Res.VisibleToRegularObj || (GV && Used.count(GV)) || + if (Res.VisibleToRegularObj || Sym.I->isUsed() || (GlobalRes.Partition != GlobalResolution::Unknown && GlobalRes.Partition != Partition)) { GlobalRes.Partition = GlobalResolution::External; @@ -494,41 +448,32 @@ writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); const SymbolResolution *ResI = Res.begin(); - for (InputFile::InputModule &IM : Input->Mods) - if (Error Err = addModule(*Input, IM, ResI, Res.end())) + for (unsigned I = 0; I != Input->Mods.size(); ++I) + if (Error Err = addModule(*Input, I, ResI, Res.end())) return Err; assert(ResI == Res.end()); return Error::success(); } -Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM, +Error LTO::addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - // FIXME: move to backend - Module &M = *IM.Mod; - - if (M.getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - - if (!Conf.OverrideTriple.empty()) - M.setTargetTriple(Conf.OverrideTriple); - else if (M.getTargetTriple().empty()) - M.setTargetTriple(Conf.DefaultTriple); - - Expected HasThinLTOSummary = IM.BM.hasSummary(); + Expected HasThinLTOSummary = Input.Mods[ModI].hasSummary(); if (!HasThinLTOSummary) return HasThinLTOSummary.takeError(); + auto ModSyms = Input.module_symbols(ModI); if (*HasThinLTOSummary) - return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE); + return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE); else - return addRegularLTO(IM.BM, ResI, ResE); + return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE); } // Add a regular LTO object to the link. -Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI, +Error LTO::addRegularLTO(BitcodeModule BM, + iterator_range Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE) { if (!RegularLTO.CombinedModule) { RegularLTO.CombinedModule = @@ -549,9 +494,6 @@ ModuleSymbolTable SymTab; SymTab.addModule(&M); - SmallPtrSet Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - std::vector Keep; for (GlobalVariable &GV : M.globals()) @@ -563,19 +505,29 @@ if (GlobalObject *GO = GA.getBaseObject()) AliasedGlobals.insert(GO); - for (const InputFile::Symbol &Sym : - make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab, - nullptr), - InputFile::symbol_iterator(SymTab.symbols().end(), SymTab, - nullptr))) { + auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end(); + auto Skip = [&]() { + while (MsymI != MsymE) { + auto Flags = SymTab.getSymbolFlags(*MsymI); + if ((Flags & object::BasicSymbolRef::SF_Global) && + !(Flags & object::BasicSymbolRef::SF_FormatSpecific)) + return; + ++MsymI; + } + }; + Skip(); + + for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Used, Sym, Res, 0); + addSymbolToGlobalRes(Sym, Res, 0); + + ModuleSymbolTable::Symbol Msym = *MsymI++; + Skip(); - if (Sym.isGV()) { - GlobalValue *GV = Sym.getGV(); + if (GlobalValue *GV = Msym.dyn_cast()) { if (Res.Prevailing) { - if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined) + if (GV->isDeclarationForLinker()) continue; Keep.push_back(GV); switch (GV->getLinkage()) { @@ -608,10 +560,11 @@ // Common resolution: collect the maximum size/alignment over all commons. // We also record if we see an instance of a common as prevailing, so that // if none is prevailing we can ignore it later. - if (Sym.getFlags() & object::BasicSymbolRef::SF_Common) { + if (Sym.isCommon()) { // FIXME: We should figure out what to do about commons defined by asm. // For now they aren't reported correctly by ModuleSymbolTable. - auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()]; + auto &CommonRes = + RegularLTO.Commons[Msym.get()->getName()]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment()); CommonRes.Prevailing |= Res.Prevailing; @@ -619,6 +572,7 @@ // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } + assert(MsymI == MsymE); return RegularLTO.Mover->move(std::move(*MOrErr), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, @@ -626,15 +580,10 @@ } // Add a ThinLTO object to the link. -// FIXME: This function should not need to take as many parameters once we have -// a bitcode symbol table. -Error LTO::addThinLTO(BitcodeModule BM, Module &M, +Error LTO::addThinLTO(BitcodeModule BM, iterator_range Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - SmallPtrSet Used; - collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); - Expected> SummaryOrErr = BM.getSummary(); if (!SummaryOrErr) return SummaryOrErr.takeError(); @@ -644,11 +593,16 @@ for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1); - - if (Res.Prevailing && Sym.isGV()) - ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] = - BM.getModuleIdentifier(); + addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1); + + if (Res.Prevailing) { + StringRef IRName = Sym.SymTab.str(Sym.I->IRName); + if (!IRName.empty()) { + auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( + IRName, GlobalValue::ExternalLinkage, "")); + ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + } + } } if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -27,6 +27,7 @@ #include "llvm/LTO/LTO.h" #include "llvm/LTO/legacy/UpdateCompilerUsed.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" Index: llvm/lib/Object/CMakeLists.txt =================================================================== --- llvm/lib/Object/CMakeLists.txt +++ llvm/lib/Object/CMakeLists.txt @@ -8,6 +8,7 @@ ELFObjectFile.cpp Error.cpp IRObjectFile.cpp + IRSymtab.cpp MachOObjectFile.cpp MachOUniversal.cpp ModuleSummaryIndexObjectFile.cpp Index: llvm/lib/Object/IRSymtab.cpp =================================================================== --- /dev/null +++ llvm/lib/Object/IRSymtab.cpp @@ -0,0 +1,231 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; + +namespace { + +struct IRSymtabWriter { + SmallVector &Symtab; + SmallVector &Strtab; + IRSymtabWriter(SmallVector &Symtab, SmallVector &Strtab) + : Symtab(Symtab), Strtab(Strtab) {} + + StringTableBuilder StrtabBuilder{StringTableBuilder::ELF}; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; + + DenseMap ComdatMap; + ModuleSymbolTable Msymtab; + SmallPtrSet Used; + Mangler Mang; + Triple TT; + + std::vector Comdats; + std::vector Mods; + std::vector Syms; + std::vector Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + void setStr(irsymtab::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + } + template + void writeRange(irsymtab::Range &R, const std::vector &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast(Objs.data()), + reinterpret_cast(Objs.data() + Objs.size())); + } + + Error addModule(Module *M); + Error addSymbol(ModuleSymbolTable::Symbol Sym); + + Error write(ArrayRef Mods); +}; + +Error IRSymtabWriter::addModule(Module *M) { + collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); + + irsymtab::Module Mod; + Mod.Begin = Msymtab.symbols().size(); + Msymtab.addModule(M); + Mod.End = Msymtab.symbols().size(); + Mods.push_back(Mod); + + for (const auto &C : M->getComdatSymbolTable()) { + auto P = ComdatMap.insert(std::make_pair(&C.second, Comdats.size())); + assert(P.second); + (void)P; + + irsymtab::Comdat Comdat; + setStr(Comdat.Name, C.first()); + Comdats.push_back(Comdat); + } + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (Metadata *Val = M->getModuleFlag("Linker Options")) { + MDNode *LinkerOptions = cast(Val); + for (const MDOperand &MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast(MDOption)->getString(); + } + } + + return Error::success(); +} + +Error IRSymtabWriter::addSymbol(ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + irsymtab::Symbol &Sym = Syms.back(); + Sym = {}; + + Sym.UncommonIndex = -1; + irsymtab::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> irsymtab::Uncommon & { + if (Unc) + return *Unc; + Sym.UncommonIndex = Uncommons.size(); + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(StringRef(Name))); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << irsymtab::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << irsymtab::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << irsymtab::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << irsymtab::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << irsymtab::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << irsymtab::Symbol::FB_format_specific; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast(); + if (!GV) { + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + if (Used.count(GV)) + Sym.Flags |= 1 << irsymtab::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << irsymtab::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << irsymtab::Symbol::FB_unnamed_addr; + if (canBeOmittedFromSymbolTable(GV)) + Sym.Flags |= 1 << irsymtab::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << irsymtab::Symbol::FB_visibility; + + if (Sym.isCommon()) { + Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize( + GV->getType()->getElementType()); + Uncommon().CommonAlign = GV->getAlignment(); + } + + const GlobalObject *Base = GV->getBaseObject(); + if (!Base) + return make_error("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = Base->getComdat()) { + auto I = ComdatMap.find(C); + assert(I != ComdatMap.end()); + Sym.ComdatIndex = I->second; + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if (Sym.isWeak() && Sym.isIndirect()) { + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName( + OS, cast( + cast(GV)->getAliasee()->stripPointerCasts())); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + return Error::success(); +} + +Error IRSymtabWriter::write(ArrayRef IRMods) { + irsymtab::Header Hdr; + setStr(Hdr.Producer, LLVM_VERSION_STRING); + + assert(!IRMods.empty()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + // This adds the symbols for each module to Msymtab. + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msym)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(irsymtab::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + + *reinterpret_cast(Symtab.data()) = Hdr; + + raw_svector_ostream OS(Strtab); + StrtabBuilder.finalizeInOrder(); + StrtabBuilder.write(OS); + + return Error::success(); +} + +} // anonymous namespace + +Error llvm::writeSymtab(ArrayRef Mods, SmallVector &Symtab, + SmallVector &Strtab) { + return IRSymtabWriter(Symtab, Strtab).write(Mods); +} Index: llvm/tools/gold/gold-plugin.cpp =================================================================== --- llvm/tools/gold/gold-plugin.cpp +++ llvm/tools/gold/gold-plugin.cpp @@ -21,7 +21,9 @@ #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/LTO/Caching.h" #include "llvm/LTO/LTO.h" +#include "llvm/Object/Error.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -463,7 +465,7 @@ EC == object::object_error::bitcode_section_not_found) *claimed = 0; else - message(LDPL_ERROR, + message(LDPL_FATAL, "LLVM gold plugin has failed to create LTO module: %s", EI.message().c_str()); }); @@ -495,9 +497,8 @@ cf.name += ".llvm." + std::to_string(file->offset) + "." + sys::path::filename(Obj->getSourceFileName()).str(); + std::vector ComdatTable = Obj->getComdatTable(); for (auto &Sym : Obj->symbols()) { - uint32_t Symflags = Sym.getFlags(); - cf.syms.push_back(ld_plugin_symbol()); ld_plugin_symbol &sym = cf.syms.back(); sym.version = nullptr; @@ -523,22 +524,22 @@ break; } - if (Symflags & object::BasicSymbolRef::SF_Undefined) { + if (Sym.isUndefined()) { sym.def = LDPK_UNDEF; - if (Symflags & object::BasicSymbolRef::SF_Weak) + if (Sym.isWeak()) sym.def = LDPK_WEAKUNDEF; - } else if (Symflags & object::BasicSymbolRef::SF_Common) + } else if (Sym.isCommon()) sym.def = LDPK_COMMON; - else if (Symflags & object::BasicSymbolRef::SF_Weak) + else if (Sym.isWeak()) sym.def = LDPK_WEAKDEF; else sym.def = LDPK_DEF; sym.size = 0; sym.comdat_key = nullptr; - int CI = check(Sym.getComdatIndex()); + int CI = Sym.getComdatIndex(); if (CI != -1) { - StringRef C = Obj->getComdatTable()[CI]; + StringRef C = ComdatTable[CI]; sym.comdat_key = strdup(C.str().c_str()); } Index: llvm/tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- llvm/tools/llvm-lto2/llvm-lto2.cpp +++ llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -21,12 +21,12 @@ #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/LTO/LTO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Threading.h" using namespace llvm; using namespace lto; -using namespace object; static cl::opt OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "