Index: lld/trunk/ELF/Driver.cpp =================================================================== --- lld/trunk/ELF/Driver.cpp +++ lld/trunk/ELF/Driver.cpp @@ -447,6 +447,7 @@ // all linker scripts have already been parsed. template void LinkerDriver::link(opt::InputArgList &Args) { SymbolTable Symtab; + elf::Symtab::X = &Symtab; std::unique_ptr TI(createTarget()); Target = TI.get(); @@ -468,7 +469,7 @@ if (!Config->Entry.empty()) { StringRef S = Config->Entry; if (S.getAsInteger(0, Config->EntryAddr)) - Config->EntrySym = Symtab.addUndefined(S)->Backref; + Config->EntrySym = Symtab.addUndefined(S); } for (std::unique_ptr &F : Files) Index: lld/trunk/ELF/InputFiles.h =================================================================== --- lld/trunk/ELF/InputFiles.h +++ lld/trunk/ELF/InputFiles.h @@ -89,13 +89,14 @@ uint32_t getSectionIndex(const Elf_Sym &Sym) const; + Elf_Sym_Range getElfSymbols(bool OnlyGlobals); + protected: llvm::object::ELFFile ELFObj; const Elf_Shdr *Symtab = nullptr; ArrayRef SymtabSHNDX; StringRef StringTable; void initStringTable(); - Elf_Sym_Range getElfSymbols(bool OnlyGlobals); }; // .o file. @@ -126,7 +127,7 @@ InputSectionBase *getSection(const Elf_Sym &Sym) const; SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { - return SymbolBodies[SymbolIndex]->repl(); + return *SymbolBodies[SymbolIndex]; } template SymbolBody &getRelocTargetSym(const RelT &Rel) const { @@ -183,9 +184,7 @@ return F->kind() == LazyObjectKind; } - void parse(); - - llvm::MutableArrayRef getLazySymbols() { return LazySymbols; } + template void parse(); private: std::vector getSymbols(); @@ -194,7 +193,6 @@ llvm::BumpPtrAllocator Alloc; llvm::StringSaver Saver{Alloc}; - std::vector LazySymbols; }; // An ArchiveFile object represents a .a file. @@ -202,43 +200,36 @@ public: explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } - void parse(); + template void parse(); // Returns a memory buffer for a given symbol. An empty memory buffer // is returned if we have already returned the same memory buffer. // (So that we don't instantiate same members more than once.) MemoryBufferRef getMember(const Archive::Symbol *Sym); - llvm::MutableArrayRef getLazySymbols() { return LazySymbols; } - private: std::unique_ptr File; - std::vector LazySymbols; llvm::DenseSet Seen; }; class BitcodeFile : public InputFile { public: explicit BitcodeFile(MemoryBufferRef M); - static bool classof(const InputFile *F); + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + template void parse(llvm::DenseSet &ComdatGroups); - ArrayRef getSymbols() { return SymbolBodies; } - static bool shouldSkip(const llvm::object::BasicSymbolRef &Sym); + ArrayRef getSymbols() { return Symbols; } + static bool shouldSkip(uint32_t Flags); std::unique_ptr Obj; private: - std::vector SymbolBodies; + std::vector Symbols; llvm::BumpPtrAllocator Alloc; llvm::StringSaver Saver{Alloc}; - SymbolBody * - createSymbolBody(const llvm::DenseSet &KeptComdats, - const llvm::object::IRObjectFile &Obj, - const llvm::object::BasicSymbolRef &Sym); - SymbolBody * - createBody(const llvm::DenseSet &KeptComdats, - const llvm::object::IRObjectFile &Obj, - const llvm::object::BasicSymbolRef &Sym, - const llvm::GlobalValue *GV); + template + Symbol *createSymbol(const llvm::DenseSet &KeptComdats, + const llvm::object::IRObjectFile &Obj, + const llvm::object::BasicSymbolRef &Sym); }; // .so file. @@ -251,7 +242,6 @@ typedef typename ELFT::Versym Elf_Versym; typedef typename ELFT::Verdef Elf_Verdef; - std::vector> SymbolBodies; std::vector Undefs; StringRef SoName; const Elf_Shdr *VersymSec = nullptr; @@ -259,9 +249,6 @@ public: StringRef getSoName() const { return SoName; } - llvm::MutableArrayRef> getSharedSymbols() { - return SymbolBodies; - } const Elf_Shdr *getSection(const Elf_Sym &Sym) const; llvm::ArrayRef getUndefinedSymbols() { return Undefs; } Index: lld/trunk/ELF/InputFiles.cpp =================================================================== --- lld/trunk/ELF/InputFiles.cpp +++ lld/trunk/ELF/InputFiles.cpp @@ -11,6 +11,7 @@ #include "Driver.h" #include "Error.h" #include "InputSection.h" +#include "SymbolTable.h" #include "Symbols.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" @@ -330,11 +331,14 @@ switch (Sym->st_shndx) { case SHN_UNDEF: - return new (Alloc) Undefined(Name, Binding, Sym->st_other, Sym->getType(), - /*IsBitcode*/ false); + return Symtab::X + ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), this) + ->body(); case SHN_COMMON: - return new (Alloc) DefinedCommon(Name, Sym->st_size, Sym->st_value, Binding, - Sym->st_other, Sym->getType()); + return Symtab::X + ->addCommon(Name, Sym->st_size, Sym->st_value, Binding, Sym->st_other, + Sym->getType(), this) + ->body(); } switch (Binding) { @@ -344,22 +348,19 @@ case STB_WEAK: case STB_GNU_UNIQUE: if (Sec == &InputSection::Discarded) - return new (Alloc) Undefined(Name, Binding, Sym->st_other, Sym->getType(), - /*IsBitcode*/ false); - return new (Alloc) DefinedRegular(Name, *Sym, Sec); + return Symtab::X + ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), this) + ->body(); + return Symtab::X->addRegular(Name, *Sym, Sec)->body(); } } -void ArchiveFile::parse() { +template void ArchiveFile::parse() { File = check(Archive::create(MB), "failed to parse archive"); - // Allocate a buffer for Lazy objects. - size_t NumSyms = File->getNumberOfSymbols(); - LazySymbols.reserve(NumSyms); - // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) - LazySymbols.emplace_back(this, Sym); + Symtab::X->addLazyArchive(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. @@ -487,8 +488,6 @@ std::vector Verdefs = parseVerdefs(Versym); Elf_Sym_Range Syms = this->getElfSymbols(true); - uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end()); - SymbolBodies.reserve(NumSymbols); for (const Elf_Sym &Sym : Syms) { unsigned VersymIndex = 0; if (Versym) { @@ -507,16 +506,12 @@ if (VersymIndex == 0 || (VersymIndex & VERSYM_HIDDEN)) continue; } - SymbolBodies.emplace_back(this, Name, Sym, Verdefs[VersymIndex]); + Symtab::X->addShared(this, Name, Sym, Verdefs[VersymIndex]); } } BitcodeFile::BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} -bool BitcodeFile::classof(const InputFile *F) { - return F->kind() == BitcodeKind; -} - static uint8_t getGvVisibility(const GlobalValue *GV) { switch (GV->getVisibility()) { case GlobalValue::DefaultVisibility: @@ -529,21 +524,30 @@ llvm_unreachable("unknown visibility"); } -SymbolBody * -BitcodeFile::createBody(const DenseSet &KeptComdats, - const IRObjectFile &Obj, - const BasicSymbolRef &Sym, - const GlobalValue *GV) { +template +Symbol *BitcodeFile::createSymbol(const DenseSet &KeptComdats, + const IRObjectFile &Obj, + const BasicSymbolRef &Sym) { + const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl()); + SmallString<64> Name; raw_svector_ostream OS(Name); Sym.printName(OS); StringRef NameRef = Saver.save(StringRef(Name)); - SymbolBody *Body; uint32_t Flags = Sym.getFlags(); bool IsWeak = Flags & BasicSymbolRef::SF_Weak; uint32_t Binding = IsWeak ? STB_WEAK : STB_GLOBAL; + uint8_t Type = STT_NOTYPE; + bool CanOmitFromDynSym = false; + // FIXME: Expose a thread-local flag for module asm symbols. + if (GV) { + if (GV->isThreadLocal()) + Type = STT_TLS; + CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV); + } + uint8_t Visibility; if (GV) Visibility = getGvVisibility(GV); @@ -554,46 +558,28 @@ if (GV) if (const Comdat *C = GV->getComdat()) - if (!KeptComdats.count(C)) { - Body = new (Alloc) Undefined(NameRef, Binding, Visibility, /*Type*/ 0, - /*IsBitcode*/ true); - return Body; - } + if (!KeptComdats.count(C)) + return Symtab::X->addUndefined(NameRef, Binding, Visibility, Type, + this); const Module &M = Obj.getModule(); if (Flags & BasicSymbolRef::SF_Undefined) - return new (Alloc) Undefined(NameRef, Binding, Visibility, /*Type*/ 0, - /*IsBitcode*/ true); + return Symtab::X->addUndefined(NameRef, Binding, Visibility, Type, + this); if (Flags & BasicSymbolRef::SF_Common) { // FIXME: Set SF_Common flag correctly for module asm symbols, and expose // size and alignment. assert(GV); const DataLayout &DL = M.getDataLayout(); uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); - return new (Alloc) DefinedCommon(NameRef, Size, GV->getAlignment(), Binding, - Visibility, /*Type*/ 0); + return Symtab::X->addCommon(NameRef, Size, GV->getAlignment(), + Binding, Visibility, STT_OBJECT, this); } - return new (Alloc) DefinedBitcode(NameRef, IsWeak, Visibility); + return Symtab::X->addBitcode(NameRef, IsWeak, Visibility, Type, + CanOmitFromDynSym, this); } -SymbolBody * -BitcodeFile::createSymbolBody(const DenseSet &KeptComdats, - const IRObjectFile &Obj, - const BasicSymbolRef &Sym) { - const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl()); - SymbolBody *Body = createBody(KeptComdats, Obj, Sym, GV); - - // FIXME: Expose a thread-local flag for module asm symbols. - if (GV) { - if (GV->isThreadLocal()) - Body->Type = STT_TLS; - Body->CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV); - } - return Body; -} - -bool BitcodeFile::shouldSkip(const BasicSymbolRef &Sym) { - uint32_t Flags = Sym.getFlags(); +bool BitcodeFile::shouldSkip(uint32_t Flags) { if (!(Flags & BasicSymbolRef::SF_Global)) return true; if (Flags & BasicSymbolRef::SF_FormatSpecific) @@ -601,6 +587,7 @@ return false; } +template void BitcodeFile::parse(DenseSet &ComdatGroups) { Obj = check(IRObjectFile::create(MB, Driver->Context)); const Module &M = Obj->getModule(); @@ -613,8 +600,8 @@ } for (const BasicSymbolRef &Sym : Obj->symbols()) - if (!shouldSkip(Sym)) - SymbolBodies.push_back(createSymbolBody(KeptComdats, *Obj, Sym)); + if (!shouldSkip(Sym.getFlags())) + Symbols.push_back(createSymbol(KeptComdats, *Obj, Sym)); } template @@ -675,9 +662,10 @@ return createELFFile(MB); } +template void LazyObjectFile::parse() { for (StringRef Sym : getSymbols()) - LazySymbols.emplace_back(Sym, this->MB); + Symtab::X->addLazyObject(Sym, this->MB); } template std::vector LazyObjectFile::getElfSymbols() { @@ -707,9 +695,10 @@ check(IRObjectFile::create(this->MB, Context)); std::vector V; for (const BasicSymbolRef &Sym : Obj->symbols()) { - if (BitcodeFile::shouldSkip(Sym)) + uint32_t Flags = Sym.getFlags(); + if (BitcodeFile::shouldSkip(Flags)) continue; - if (Sym.getFlags() & BasicSymbolRef::SF_Undefined) + if (Flags & BasicSymbolRef::SF_Undefined) continue; SmallString<64> Name; raw_svector_ostream OS(Name); @@ -737,6 +726,25 @@ return getElfSymbols(); } +template void ArchiveFile::parse(); +template void ArchiveFile::parse(); +template void ArchiveFile::parse(); +template void ArchiveFile::parse(); + +template void +BitcodeFile::parse(llvm::DenseSet &ComdatGroups); +template void +BitcodeFile::parse(llvm::DenseSet &ComdatGroups); +template void +BitcodeFile::parse(llvm::DenseSet &ComdatGroups); +template void +BitcodeFile::parse(llvm::DenseSet &ComdatGroups); + +template void LazyObjectFile::parse(); +template void LazyObjectFile::parse(); +template void LazyObjectFile::parse(); +template void LazyObjectFile::parse(); + template class elf::ELFFileBase; template class elf::ELFFileBase; template class elf::ELFFileBase; Index: lld/trunk/ELF/LTO.cpp =================================================================== --- lld/trunk/ELF/LTO.cpp +++ lld/trunk/ELF/LTO.cpp @@ -76,14 +76,14 @@ } static bool shouldInternalize(const SmallPtrSet &Used, - SymbolBody &B, GlobalValue *GV) { - if (B.Backref->IsUsedInRegularObj) + Symbol *S, GlobalValue *GV) { + if (S->IsUsedInRegularObj) return false; if (Used.count(GV)) return false; - return !B.Backref->includeInDynsym(); + return !S->includeInDynsym(); } BitcodeCompiler::BitcodeCompiler() @@ -94,7 +94,7 @@ std::unique_ptr Obj = std::move(F.Obj); std::vector Keep; unsigned BodyIndex = 0; - ArrayRef Bodies = F.getSymbols(); + ArrayRef Syms = F.getSymbols(); Module &M = Obj->getModule(); if (M.getDataLayoutStr().empty()) @@ -106,19 +106,30 @@ SmallPtrSet Used; collectUsedGlobalVariables(M, Used, /* CompilerUsed */ false); + // This function is called if we know that the combined LTO object will + // provide a definition of a symbol. It undefines the symbol so that the + // definition in the combined LTO object will replace it when parsed. + auto Undefine = [](Symbol *S) { + replaceBody(S, S->body()->getName(), STV_DEFAULT, 0); + }; + for (const BasicSymbolRef &Sym : Obj->symbols()) { + uint32_t Flags = Sym.getFlags(); GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl()); - // Ignore module asm symbols. - if (!GV) - continue; - if (GV->hasAppendingLinkage()) { + if (GV && GV->hasAppendingLinkage()) Keep.push_back(GV); + if (BitcodeFile::shouldSkip(Flags)) continue; - } - if (BitcodeFile::shouldSkip(Sym)) + Symbol *S = Syms[BodyIndex++]; + if (Flags & BasicSymbolRef::SF_Undefined) + continue; + if (!GV) { + // Module asm symbol. + Undefine(S); continue; - SymbolBody *B = Bodies[BodyIndex++]; - if (!B || &B->repl() != B || !isa(B)) + } + auto *B = dyn_cast(S->body()); + if (!B || B->File != &F) continue; switch (GV->getLinkage()) { default: @@ -136,8 +147,10 @@ // we imported the symbols and satisfied undefined references // to it. We can't just change linkage here because otherwise // the IRMover will just rename the symbol. - if (shouldInternalize(Used, *B, GV)) + if (shouldInternalize(Used, S, GV)) InternalizedSyms.insert(GV->getName()); + else + Undefine(S); Keep.push_back(GV); } Index: lld/trunk/ELF/MarkLive.cpp =================================================================== --- lld/trunk/ELF/MarkLive.cpp +++ lld/trunk/ELF/MarkLive.cpp @@ -137,15 +137,14 @@ Q.push_back(S); }; - auto MarkSymbol = [&](SymbolBody *Sym) { - if (Sym) - if (auto *D = dyn_cast>(Sym)) - Enqueue({D->Section, D->Value}); + auto MarkSymbol = [&](const SymbolBody *Sym) { + if (auto *D = dyn_cast_or_null>(Sym)) + Enqueue({D->Section, D->Value}); }; // Add GC root symbols. if (Config->EntrySym) - MarkSymbol(Config->EntrySym->Body); + MarkSymbol(Config->EntrySym->body()); MarkSymbol(Symtab->find(Config->Init)); MarkSymbol(Symtab->find(Config->Fini)); for (StringRef S : Config->Undefined) @@ -155,7 +154,7 @@ // file can interrupt other ELF file's symbols at runtime. for (const Symbol *S : Symtab->getSymbols()) if (S->includeInDynsym()) - MarkSymbol(S->Body); + MarkSymbol(S->body()); // Preserve special sections and those which are specified in linker // script KEEP command. Index: lld/trunk/ELF/OutputSections.cpp =================================================================== --- lld/trunk/ELF/OutputSections.cpp +++ lld/trunk/ELF/OutputSections.cpp @@ -144,9 +144,9 @@ } template bool GotSection::addDynTlsEntry(SymbolBody &Sym) { - if (Sym.hasGlobalDynIndex()) + if (Sym.symbol()->GlobalDynIndex != -1U) return false; - Sym.GlobalDynIndex = Entries.size(); + Sym.symbol()->GlobalDynIndex = Entries.size(); // Global Dynamic TLS entries take two GOT slots. Entries.push_back(&Sym); Entries.push_back(nullptr); @@ -186,13 +186,13 @@ template typename GotSection::uintX_t GotSection::getGlobalDynAddr(const SymbolBody &B) const { - return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); + return this->getVA() + B.symbol()->GlobalDynIndex * sizeof(uintX_t); } template typename GotSection::uintX_t GotSection::getGlobalDynOffset(const SymbolBody &B) const { - return B.GlobalDynIndex * sizeof(uintX_t); + return B.symbol()->GlobalDynIndex * sizeof(uintX_t); } template @@ -1371,7 +1371,7 @@ } static uint8_t getSymbolBinding(SymbolBody *Body) { - Symbol *S = Body->Backref; + Symbol *S = Body->symbol(); uint8_t Visibility = S->Visibility; if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) return STB_LOCAL; @@ -1472,7 +1472,7 @@ ESym->setBindingAndType(getSymbolBinding(Body), Type); ESym->st_size = Size; ESym->st_name = StrOff; - ESym->setVisibility(Body->Backref->Visibility); + ESym->setVisibility(Body->symbol()->Visibility); ESym->st_value = Body->getVA(); if (const OutputSectionBase *OutSec = getOutputSection(Body)) Index: lld/trunk/ELF/SymbolTable.h =================================================================== --- lld/trunk/ELF/SymbolTable.h +++ lld/trunk/ELF/SymbolTable.h @@ -31,7 +31,9 @@ // conflicts. For example, obviously, a defined symbol is better than // an undefined symbol. Or, if there's a conflict between a lazy and a // undefined, it'll read an archive member to read a real definition -// to replace the lazy symbol. The logic is implemented in resolve(). +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. There +// is one add* function per symbol type. template class SymbolTable { typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::uint uintX_t; @@ -50,14 +52,32 @@ return SharedFiles; } - SymbolBody *addUndefined(StringRef Name); DefinedRegular *addAbsolute(StringRef Name, uint8_t Visibility = llvm::ELF::STV_HIDDEN); - SymbolBody *addSynthetic(StringRef Name, OutputSectionBase &Section, - uintX_t Value); DefinedRegular *addIgnored(StringRef Name, uint8_t Visibility = llvm::ELF::STV_HIDDEN); + Symbol *addUndefined(StringRef Name); + Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther, + uint8_t Type, InputFile *File); + + Symbol *addRegular(StringRef Name, const Elf_Sym &Sym, + InputSectionBase *Section); + Symbol *addRegular(StringRef Name, uint8_t Binding, uint8_t StOther); + Symbol *addSynthetic(StringRef N, OutputSectionBase &Section, + uintX_t Value); + void addShared(SharedFile *F, StringRef Name, const Elf_Sym &Sym, + const typename ELFT::Verdef *Verdef); + + void addLazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S); + void addLazyObject(StringRef Name, MemoryBufferRef MBRef); + Symbol *addBitcode(StringRef Name, bool IsWeak, uint8_t StOther, uint8_t Type, + bool CanOmitFromDynSym, BitcodeFile *File); + + Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment, + uint8_t Binding, uint8_t StOther, uint8_t Type, + InputFile *File); + void scanUndefinedFlags(); void scanShlibUndefined(); void scanDynamicList(); @@ -67,11 +87,13 @@ InputFile *findFile(SymbolBody *B); private: - Symbol *insert(SymbolBody *New); - void addLazy(Lazy *New); - void addMemberFile(SymbolBody *Undef, Lazy *L); - void resolve(SymbolBody *Body); - std::string conflictMsg(SymbolBody *Old, SymbolBody *New); + std::pair insert(StringRef Name); + std::pair insert(StringRef Name, uint8_t Type, + uint8_t Visibility, bool CanOmitFromDynSym, + bool IsUsedInRegularObj, InputFile *File); + + std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile); + void reportDuplicate(SymbolBody *Existing, InputFile *NewFile); // The order the global symbols are in is not defined. We can use an arbitrary // order, but it has to be reproducible. That is true even when cross linking. @@ -102,6 +124,9 @@ std::unique_ptr Lto; }; +template struct Symtab { static SymbolTable *X; }; +template SymbolTable *Symtab::X; + } // namespace elf } // namespace lld Index: lld/trunk/ELF/SymbolTable.cpp =================================================================== --- lld/trunk/ELF/SymbolTable.cpp +++ lld/trunk/ELF/SymbolTable.cpp @@ -64,18 +64,14 @@ // .a file if (auto *F = dyn_cast(FileP)) { ArchiveFiles.emplace_back(cast(File.release())); - F->parse(); - for (Lazy &Sym : F->getLazySymbols()) - addLazy(&Sym); + F->parse(); return; } // Lazy object file if (auto *F = dyn_cast(FileP)) { LazyObjectFiles.emplace_back(cast(File.release())); - F->parse(); - for (Lazy &Sym : F->getLazySymbols()) - addLazy(&Sym); + F->parse(); return; } @@ -91,18 +87,13 @@ SharedFiles.emplace_back(cast>(File.release())); F->parseRest(); - for (SharedSymbol &B : F->getSharedSymbols()) - resolve(&B); return; } // LLVM bitcode file if (auto *F = dyn_cast(FileP)) { BitcodeFiles.emplace_back(cast(File.release())); - F->parse(ComdatGroups); - for (SymbolBody *B : F->getSymbols()) - if (B) - resolve(B); + F->parse(ComdatGroups); return; } @@ -110,8 +101,6 @@ auto *F = cast>(FileP); ObjectFiles.emplace_back(cast>(File.release())); F->parse(ComdatGroups); - for (SymbolBody *B : F->getNonLocalSymbols()) - resolve(B); } // This function is where all the optimizations of link-time @@ -137,41 +126,15 @@ llvm::DenseSet DummyGroups; Obj->parse(DummyGroups); - for (SymbolBody *Body : Obj->getNonLocalSymbols()) { - Symbol *Sym = insert(Body); - if (!Sym->Body->isUndefined() && Body->isUndefined()) - continue; - Sym->Body = Body; - } ObjectFiles.emplace_back(Obj); } } -// Add an undefined symbol. -template -SymbolBody *SymbolTable::addUndefined(StringRef Name) { - auto *Sym = new (Alloc) Undefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, - /*IsBitcode*/ false); - resolve(Sym); - return Sym; -} - template DefinedRegular *SymbolTable::addAbsolute(StringRef Name, uint8_t Visibility) { - // Pass nullptr because absolute symbols have no corresponding input sections. - auto *Sym = new (Alloc) DefinedRegular(Name, STB_GLOBAL, Visibility); - resolve(Sym); - return Sym; -} - -template -SymbolBody *SymbolTable::addSynthetic(StringRef Name, - OutputSectionBase &Sec, - uintX_t Val) { - auto *Sym = new (Alloc) DefinedSynthetic(Name, Val, Sec); - resolve(Sym); - return Sym; + return cast>( + addRegular(Name, STB_GLOBAL, Visibility)->body()); } // Add Name as an "ignored" symbol. An ignored symbol is a regular @@ -191,84 +154,47 @@ if (!B) return; StringSaver Saver(Alloc); - Symbol *Sym = B->Backref; - Symbol *Real = addUndefined(Saver.save("__real_" + Name))->Backref; - Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name))->Backref; - Real->Body = Sym->Body; - Sym->Body = Wrap->Body; + Symbol *Sym = B->symbol(); + Symbol *Real = addUndefined(Saver.save("__real_" + Name)); + Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name)); + // We rename symbols by replacing the old symbol's SymbolBody with the new + // symbol's SymbolBody. This causes all SymbolBody pointers referring to the + // old symbol to instead refer to the new symbol. + memcpy(Real->Body.buffer, Sym->Body.buffer, sizeof(Sym->Body)); + memcpy(Sym->Body.buffer, Wrap->Body.buffer, sizeof(Wrap->Body)); } // Returns a file from which symbol B was created. // If B does not belong to any file, returns a nullptr. // This function is slow, but it's okay as it is used only for error messages. template InputFile *SymbolTable::findFile(SymbolBody *B) { + // If this symbol has a definition, follow pointers in the symbol to its + // defining file. + if (auto *R = dyn_cast>(B)) + if (auto *S = R->Section) + return S->getFile(); + if (auto *SS = dyn_cast>(B)) + return SS->File; + if (auto *BC = dyn_cast(B)) + return BC->File; + // If not, we might be able to find it by searching symbol tables of files. + // This code is generally only used for undefined symbols. Note that we can't + // rely exclusively on a file search because we may find what was originally + // an undefined symbol that was later replaced with a defined symbol, and we + // want to return the file that defined the symbol. for (const std::unique_ptr> &F : ObjectFiles) { ArrayRef Syms = F->getSymbols(); if (std::find(Syms.begin(), Syms.end(), B) != Syms.end()) return F.get(); } for (const std::unique_ptr &F : BitcodeFiles) { - ArrayRef Syms = F->getSymbols(); - if (std::find(Syms.begin(), Syms.end(), B) != Syms.end()) + ArrayRef Syms = F->getSymbols(); + if (std::find(Syms.begin(), Syms.end(), B->symbol()) != Syms.end()) return F.get(); } return nullptr; } -// Construct a string in the form of "Sym in File1 and File2". -// Used to construct an error message. -template -std::string SymbolTable::conflictMsg(SymbolBody *Old, SymbolBody *New) { - InputFile *F1 = findFile(Old); - InputFile *F2 = findFile(New); - StringRef Sym = Old->getName(); - return demangle(Sym) + " in " + getFilename(F1) + " and " + getFilename(F2); -} - -// This function resolves conflicts if there's an existing symbol with -// the same name. Decisions are made based on symbol type. -template void SymbolTable::resolve(SymbolBody *New) { - Symbol *Sym = insert(New); - if (Sym->Body == New) - return; - - SymbolBody *Existing = Sym->Body; - - if (auto *L = dyn_cast(Existing)) { - Sym->Binding = New->Binding; - if (New->isUndefined()) { - addMemberFile(New, L); - return; - } - // Found a definition for something also in an archive. - // Ignore the archive definition. - Sym->Body = New; - return; - } - - if (New->isTls() != Existing->isTls()) { - error("TLS attribute mismatch for symbol: " + conflictMsg(Existing, New)); - return; - } - - // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, - // equivalent (conflicting), or more preferable, respectively. - int Comp = Existing->compare(New); - if (Comp == 0) { - std::string S = "duplicate symbol: " + conflictMsg(Existing, New); - if (Config->AllowMultipleDefinition) - warning(S); - else - error(S); - return; - } - if (Comp < 0) { - Sym->Body = New; - if (!New->isShared()) - Sym->Binding = New->Binding; - } -} - static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { if (VA == STV_DEFAULT) return VB; @@ -277,25 +203,15 @@ return std::min(VA, VB); } -static bool shouldExport(SymbolBody *B) { - if (Config->Shared || Config->ExportDynamic) { - // Export most symbols except for those that do not need to be exported. - return !B->CanOmitFromDynSym; - } - // Make sure we preempt DSO symbols with default visibility. - return B->isShared() && B->getVisibility() == STV_DEFAULT; -} - // Find an existing symbol or create and insert a new one. -template Symbol *SymbolTable::insert(SymbolBody *New) { - StringRef Name = New->getName(); +template +std::pair SymbolTable::insert(StringRef Name) { unsigned NumSyms = SymVector.size(); auto P = Symtab.insert(std::make_pair(Name, NumSyms)); Symbol *Sym; if (P.second) { Sym = new (Alloc) Symbol; - Sym->Body = New; - Sym->Binding = New->isShared() ? (uint8_t)STB_GLOBAL : New->Binding; + Sym->Binding = STB_WEAK; Sym->Visibility = STV_DEFAULT; Sym->IsUsedInRegularObj = false; Sym->ExportDynamic = false; @@ -304,69 +220,299 @@ } else { Sym = SymVector[P.first->second]; } - New->Backref = Sym; + return {Sym, P.second}; +} + +// Find an existing symbol or create and insert a new one, then apply the given +// attributes. +template +std::pair +SymbolTable::insert(StringRef Name, uint8_t Type, uint8_t Visibility, + bool CanOmitFromDynSym, bool IsUsedInRegularObj, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + + // Merge in the new symbol's visibility. + S->Visibility = getMinVisibility(S->Visibility, Visibility); + if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic)) + S->ExportDynamic = true; + if (IsUsedInRegularObj) + S->IsUsedInRegularObj = true; + if (!WasInserted && ((Type == STT_TLS) != S->body()->isTls())) + error("TLS attribute mismatch for symbol: " + + conflictMsg(S->body(), File)); + + return {S, WasInserted}; +} + +// Construct a string in the form of "Sym in File1 and File2". +// Used to construct an error message. +template +std::string SymbolTable::conflictMsg(SymbolBody *Existing, + InputFile *NewFile) { + StringRef Sym = Existing->getName(); + return demangle(Sym) + " in " + getFilename(findFile(Existing)) + " and " + + getFilename(NewFile); +} + +template Symbol *SymbolTable::addUndefined(StringRef Name) { + return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, + /*File*/ nullptr); +} + +template +Symbol *SymbolTable::addUndefined(StringRef Name, uint8_t Binding, + uint8_t StOther, uint8_t Type, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Type, StOther & 3, /*CanOmitFromDynSym*/ false, + /*IsUsedInRegularObj*/ !File || !isa(File), File); + if (WasInserted) { + S->Binding = Binding; + replaceBody(S, Name, StOther, Type); + return S; + } + if (Binding != STB_WEAK && + (S->body()->isShared() || S->body()->isLazy())) + S->Binding = Binding; + if (auto *L = dyn_cast(S->body())) { + // An undefined weak will not fetch archive members, but we have to remember + // its type. See also comment in addLazyArchive. + if (S->isWeak()) + L->Type = Type; + else if (auto F = L->getFile()) + addFile(std::move(F)); + } + return S; +} + +// We have a new defined symbol with the specified binding. Return 1 if the new +// symbol should win, -1 if the new symbol should lose, or 0 if both symbols are +// strong defined symbols. +static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) { + if (WasInserted) + return 1; + SymbolBody *Body = S->body(); + if (Body->isLazy() || Body->isUndefined() || Body->isShared()) + return 1; + if (Binding == STB_WEAK) + return -1; + if (S->isWeak()) + return 1; + return 0; +} + +// We have a new non-common defined symbol with the specified binding. Return 1 +// if the new symbol should win, -1 if the new symbol should lose, or 0 if there +// is a conflict. If the new symbol wins, also update the binding. +static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding) { + if (int Cmp = compareDefined(S, WasInserted, Binding)) { + if (Cmp > 0) + S->Binding = Binding; + return Cmp; + } + if (isa(S->body())) { + // Non-common symbols take precedence over common symbols. + if (Config->WarnCommon) + warning("common " + S->body()->getName() + " is overridden"); + return 1; + } + return 0; +} + +template +Symbol *SymbolTable::addCommon(StringRef N, uint64_t Size, + uint64_t Alignment, uint8_t Binding, + uint8_t StOther, uint8_t Type, + InputFile *File) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(N, Type, StOther & 3, /*CanOmitFromDynSym*/ false, + /*IsUsedInRegularObj*/ true, File); + int Cmp = compareDefined(S, WasInserted, Binding); + if (Cmp > 0) { + S->Binding = Binding; + replaceBody(S, N, Size, Alignment, StOther, Type); + } else if (Cmp == 0) { + auto *C = dyn_cast(S->body()); + if (!C) { + // Non-common symbols take precedence over common symbols. + if (Config->WarnCommon) + warning("common " + S->body()->getName() + " is overridden"); + return S; + } + + if (Config->WarnCommon) + warning("multiple common of " + S->body()->getName()); + + C->Size = std::max(C->Size, Size); + C->Alignment = std::max(C->Alignment, Alignment); + } + return S; +} + +template +void SymbolTable::reportDuplicate(SymbolBody *Existing, + InputFile *NewFile) { + std::string Msg = "duplicate symbol: " + conflictMsg(Existing, NewFile); + if (Config->AllowMultipleDefinition) + warning(Msg); + else + error(Msg); +} + +template +Symbol *SymbolTable::addRegular(StringRef Name, const Elf_Sym &Sym, + InputSectionBase *Section) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Sym.getType(), Sym.getVisibility(), + /*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true, + Section ? Section->getFile() : nullptr); + int Cmp = compareDefinedNonCommon(S, WasInserted, Sym.getBinding()); + if (Cmp > 0) + replaceBody>(S, Name, Sym, Section); + else if (Cmp == 0) + reportDuplicate(S->body(), Section->getFile()); + return S; +} + +template +Symbol *SymbolTable::addRegular(StringRef Name, uint8_t Binding, + uint8_t StOther) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, STT_NOTYPE, StOther & 3, /*CanOmitFromDynSym*/ false, + /*IsUsedInRegularObj*/ true, nullptr); + int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); + if (Cmp > 0) + replaceBody>(S, Name, StOther); + else if (Cmp == 0) + reportDuplicate(S->body(), nullptr); + return S; +} + +template +Symbol *SymbolTable::addSynthetic(StringRef N, + OutputSectionBase &Section, + uintX_t Value) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(N, STT_NOTYPE, STV_HIDDEN, /*CanOmitFromDynSym*/ false, + /*IsUsedInRegularObj*/ true, nullptr); + int Cmp = compareDefinedNonCommon(S, WasInserted, STB_GLOBAL); + if (Cmp > 0) + replaceBody>(S, N, Value, Section); + else if (Cmp == 0) + reportDuplicate(S->body(), nullptr); + return S; +} + +template +void SymbolTable::addShared(SharedFile *F, StringRef Name, + const Elf_Sym &Sym, + const typename ELFT::Verdef *Verdef) { + // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT + // as the visibility, which will leave the visibility in the symbol table + // unchanged. + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = + insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, + /*IsUsedInRegularObj*/ false, F); + // Make sure we preempt DSO symbols with default visibility. + if (Sym.getVisibility() == STV_DEFAULT) + S->ExportDynamic = true; + if (WasInserted || isa(S->body())) + replaceBody>(S, F, Name, Sym, Verdef); +} - // Merge in the new symbol's visibility. DSO symbols do not affect visibility - // in the output. - if (!New->isShared()) - Sym->Visibility = getMinVisibility(Sym->Visibility, New->getVisibility()); - Sym->ExportDynamic = Sym->ExportDynamic || shouldExport(New); - SymbolBody::Kind K = New->kind(); - if (K == SymbolBody::DefinedRegularKind || - K == SymbolBody::DefinedCommonKind || - K == SymbolBody::DefinedSyntheticKind || - (K == SymbolBody::UndefinedKind && !New->IsUndefinedBitcode)) - Sym->IsUsedInRegularObj = true; - return Sym; +template +Symbol *SymbolTable::addBitcode(StringRef Name, bool IsWeak, + uint8_t StOther, uint8_t Type, + bool CanOmitFromDynSym, BitcodeFile *F) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name, Type, StOther & 3, CanOmitFromDynSym, + /*IsUsedInRegularObj*/ false, F); + int Cmp = + compareDefinedNonCommon(S, WasInserted, IsWeak ? STB_WEAK : STB_GLOBAL); + if (Cmp > 0) + replaceBody(S, Name, StOther, Type, F); + else if (Cmp == 0) + reportDuplicate(S->body(), F); + return S; } template SymbolBody *SymbolTable::find(StringRef Name) { auto It = Symtab.find(Name); if (It == Symtab.end()) return nullptr; - return SymVector[It->second]->Body; + return SymVector[It->second]->body(); } -template void SymbolTable::addLazy(Lazy *L) { - Symbol *Sym = insert(L); - SymbolBody *Cur = Sym->Body; - if (Cur == L) - return; - if (Cur->isUndefined()) { - Sym->Body = L; - addMemberFile(Cur, L); - } +template +void SymbolTable::addLazyArchive( + ArchiveFile *F, const llvm::object::Archive::Symbol Sym) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Sym.getName()); + if (WasInserted) { + replaceBody(S, F, Sym, STT_NOTYPE); + return; + } + if (!S->body()->isUndefined()) + return; + + // Weak undefined symbols should not fetch members from archives. If we were + // to keep old symbol we would not know that an archive member was available + // if a strong undefined symbol shows up afterwards in the link. If a strong + // undefined symbol never shows up, this lazy symbol will get to the end of + // the link and must be treated as the weak undefined one. We already marked + // this symbol as used when we added it to the symbol table, but we also need + // to preserve its type. FIXME: Move the Type field to Symbol. + if (S->isWeak()) { + replaceBody(S, F, Sym, S->body()->Type); + return; + } + MemoryBufferRef MBRef = F->getMember(&Sym); + if (!MBRef.getBuffer().empty()) + addFile(createObjectFile(MBRef, F->getName())); } template -void SymbolTable::addMemberFile(SymbolBody *Undef, Lazy *L) { - // Weak undefined symbols should not fetch members from archives. - // If we were to keep old symbol we would not know that an archive member was - // available if a strong undefined symbol shows up afterwards in the link. - // If a strong undefined symbol never shows up, this lazy symbol will - // get to the end of the link and must be treated as the weak undefined one. - // We already marked this symbol as used when we added it to the symbol table, - // but we also need to preserve its binding and type. - if (Undef->isWeak()) { - // FIXME: Consider moving these members to Symbol. - L->Type = Undef->Type; - return; - } - - // Fetch a member file that has the definition for L. - // getMember returns nullptr if the member was already read from the library. - if (std::unique_ptr File = L->getFile()) - addFile(std::move(File)); +void SymbolTable::addLazyObject(StringRef Name, MemoryBufferRef MBRef) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + replaceBody(S, Name, MBRef, STT_NOTYPE); + return; + } + if (!S->body()->isUndefined()) + return; + + // See comment for addLazyArchive above. + if (S->isWeak()) + replaceBody(S, Name, MBRef, S->body()->Type); + else + addFile(createObjectFile(MBRef)); } // Process undefined (-u) flags by loading lazy symbols named by those flags. -template -void SymbolTable::scanUndefinedFlags() { +template void SymbolTable::scanUndefinedFlags() { for (StringRef S : Config->Undefined) - if (SymbolBody *Sym = find(S)) - if (auto *L = dyn_cast(Sym)) - if (std::unique_ptr File = L->getFile()) - addFile(std::move(File)); + if (auto *L = dyn_cast_or_null(find(S))) + if (std::unique_ptr File = L->getFile()) + addFile(std::move(File)); } // This function takes care of the case in which shared libraries depend on @@ -381,7 +527,7 @@ for (StringRef U : File->getUndefinedSymbols()) if (SymbolBody *Sym = find(U)) if (Sym->isDefined()) - Sym->Backref->ExportDynamic = true; + Sym->symbol()->ExportDynamic = true; } // This function process the dynamic list option by marking all the symbols @@ -389,7 +535,7 @@ template void SymbolTable::scanDynamicList() { for (StringRef S : Config->DynamicList) if (SymbolBody *B = find(S)) - B->Backref->ExportDynamic = true; + B->symbol()->ExportDynamic = true; } // This function processes the --version-script option by marking all global @@ -398,7 +544,7 @@ template void SymbolTable::scanVersionScript() { for (StringRef S : Config->VersionScriptGlobals) if (SymbolBody *B = find(S)) - B->Backref->VersionScriptGlobal = true; + B->symbol()->VersionScriptGlobal = true; } template class elf::SymbolTable; Index: lld/trunk/ELF/Symbols.h =================================================================== --- lld/trunk/ELF/Symbols.h +++ lld/trunk/ELF/Symbols.h @@ -20,11 +20,13 @@ #include "lld/Core/LLVM.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/AlignOf.h" namespace lld { namespace elf { class ArchiveFile; +class BitcodeFile; class InputFile; class SymbolBody; template class ObjectFile; @@ -37,45 +39,7 @@ // it returns the unmodified string. std::string demangle(StringRef Name); -// A real symbol object, SymbolBody, is usually accessed indirectly -// through a Symbol. There's always one Symbol for each symbol name. -// The resolver updates SymbolBody pointers as it resolves symbols. -// Symbol also holds computed properties of symbol names. -struct Symbol { - SymbolBody *Body; - - // Symbol binding. This is on the Symbol to track changes during resolution. - // In particular: - // An undefined weak is still weak when it resolves to a shared library. - // An undefined weak will not fetch archive members, but we have to remember - // it is weak. - uint8_t Binding; - - // Symbol visibility. This is the computed minimum visibility of all - // observed non-DSO symbols. - unsigned Visibility : 2; - - // True if the symbol was used for linking and thus need to be added to the - // output file's symbol table. This is true for all symbols except for - // unreferenced DSO symbols and bitcode symbols that are unreferenced except - // by other bitcode objects. - unsigned IsUsedInRegularObj : 1; - - // If this flag is true and the symbol has protected or default visibility, it - // will appear in .dynsym. This flag is set by interposable DSO symbols in - // executables, by most symbols in DSOs and executables built with - // --export-dynamic, and by dynamic lists. - unsigned ExportDynamic : 1; - - // This flag acts as an additional filter on the dynamic symbol list. It is - // set if there is no version script, or if the symbol appears in the global - // section of the version script. - unsigned VersionScriptGlobal : 1; - - bool includeInDynsym() const; - - bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } -}; +struct Symbol; // The base class for real symbol classes. class SymbolBody { @@ -95,9 +59,13 @@ LazyObjectKind, }; + Symbol *symbol(); + const Symbol *symbol() const { + return const_cast(this)->symbol(); + } + Kind kind() const { return static_cast(SymbolKind); } - bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } bool isUndefined() const { return SymbolKind == UndefinedKind; } bool isDefined() const { return SymbolKind <= DefinedLast; } bool isCommon() const { return SymbolKind == DefinedCommonKind; } @@ -105,7 +73,7 @@ return SymbolKind == LazyArchiveKind || SymbolKind == LazyObjectKind; } bool isShared() const { return SymbolKind == SharedKind; } - bool isLocal() const { return Binding == llvm::ELF::STB_LOCAL; } + bool isLocal() const { return IsLocal; } bool isPreemptible() const; // Returns the symbol name. @@ -121,12 +89,10 @@ uint8_t getVisibility() const { return StOther & 0x3; } unsigned DynsymIndex = 0; - uint32_t GlobalDynIndex = -1; uint32_t GotIndex = -1; uint32_t GotPltIndex = -1; uint32_t PltIndex = -1; uint32_t ThunkIndex = -1; - bool hasGlobalDynIndex() { return GlobalDynIndex != uint32_t(-1); } bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } bool hasThunk() const { return ThunkIndex != -1U; } @@ -142,46 +108,23 @@ template typename ELFT::uint getThunkVA() const; template typename ELFT::uint getSize() const; - // A SymbolBody has a backreference to a Symbol. Originally they are - // doubly-linked. A backreference will never change. But the pointer - // in the Symbol may be mutated by the resolver. If you have a - // pointer P to a SymbolBody and are not sure whether the resolver - // has chosen the object among other objects having the same name, - // you can access P->Backref->Body to get the resolver's result. - SymbolBody &repl() { return Backref ? *Backref->Body : *this; } - - // Decides which symbol should "win" in the symbol table, this or - // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if - // they are duplicate (conflicting) symbols. - int compare(SymbolBody *Other); - protected: - SymbolBody(Kind K, StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type); + SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type); const unsigned SymbolKind : 8; public: - // True if this symbol can be omitted from the symbol table if nothing else - // requires it to be there. Right now this is only used for linkonce_odr in - // LTO, but we could add the feature to ELF. It would be similar to - // MachO's .weak_def_can_be_hidden. - unsigned CanOmitFromDynSym : 1; - // True if the linker has to generate a copy relocation for this shared // symbol or if the symbol should point to its plt entry. unsigned NeedsCopyOrPltAddr : 1; - // True if the symbol is undefined and comes from a bitcode file. We need to - // keep track of this because undefined symbols only prevent internalization - // of bitcode symbols if they did not come from a bitcode file. - unsigned IsUndefinedBitcode : 1; + // True if this is a local symbol. + unsigned IsLocal : 1; // The following fields have the same meaning as the ELF symbol attributes. uint8_t Type; // symbol type - uint8_t Binding; // symbol binding uint8_t StOther; // st_other field value bool isSection() const { return Type == llvm::ELF::STT_SECTION; } @@ -191,8 +134,6 @@ bool isObject() const { return Type == llvm::ELF::STT_OBJECT; } bool isFile() const { return Type == llvm::ELF::STT_FILE; } - Symbol *Backref = nullptr; - protected: struct Str { const char *S; @@ -207,8 +148,7 @@ // The base class for any defined symbols. class Defined : public SymbolBody { public: - Defined(Kind K, StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type); + Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type); static bool classof(const SymbolBody *S) { return S->isDefined(); } }; @@ -216,14 +156,16 @@ // The defined symbol in LLVM bitcode files. class DefinedBitcode : public Defined { public: - DefinedBitcode(StringRef Name, bool IsWeak, uint8_t StOther); + DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type, BitcodeFile *F); static bool classof(const SymbolBody *S); + + BitcodeFile *File; }; class DefinedCommon : public Defined { public: - DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t Binding, - uint8_t StOther, uint8_t Type); + DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther, + uint8_t Type); static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedCommonKind; @@ -247,8 +189,8 @@ public: DefinedRegular(StringRef Name, const Elf_Sym &Sym, InputSectionBase *Section) - : Defined(SymbolBody::DefinedRegularKind, Name, Sym.getBinding(), - Sym.st_other, Sym.getType()), + : Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other, + Sym.getType()), Value(Sym.st_value), Size(Sym.st_size), Section(Section ? Section->Repl : NullInputSection) {} @@ -260,8 +202,8 @@ assert(isLocal()); } - DefinedRegular(StringRef Name, uint8_t Binding, uint8_t StOther) - : Defined(SymbolBody::DefinedRegularKind, Name, Binding, StOther, + DefinedRegular(StringRef Name, uint8_t StOther) + : Defined(SymbolBody::DefinedRegularKind, Name, StOther, llvm::ELF::STT_NOTYPE), Value(0), Size(0), Section(NullInputSection) {} @@ -311,8 +253,7 @@ class Undefined : public SymbolBody { public: - Undefined(StringRef Name, uint8_t Binding, uint8_t StOther, uint8_t Type, - bool IsBitcode); + Undefined(StringRef Name, uint8_t StOther, uint8_t Type); Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type); static bool classof(const SymbolBody *S) { @@ -332,8 +273,7 @@ SharedSymbol(SharedFile *F, StringRef Name, const Elf_Sym &Sym, const Elf_Verdef *Verdef) - : Defined(SymbolBody::SharedKind, Name, Sym.getBinding(), Sym.st_other, - Sym.getType()), + : Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()), File(F), Sym(Sym), Verdef(Verdef) { // IFuncs defined in DSOs are treated as functions by the static linker. if (isGnuIFunc()) @@ -364,9 +304,8 @@ // the same name, it will ask the Lazy to load a file. class Lazy : public SymbolBody { public: - Lazy(SymbolBody::Kind K, StringRef Name) - : SymbolBody(K, Name, llvm::ELF::STB_GLOBAL, llvm::ELF::STV_DEFAULT, - /* Type */ 0) {} + Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) + : SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {} static bool classof(const SymbolBody *S) { return S->isLazy(); } @@ -378,8 +317,9 @@ // LazyArchive symbols represents symbols in archive files. class LazyArchive : public Lazy { public: - LazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S) - : Lazy(LazyArchiveKind, S.getName()), File(F), Sym(S) {} + LazyArchive(ArchiveFile *F, const llvm::object::Archive::Symbol S, + uint8_t Type) + : Lazy(LazyArchiveKind, S.getName(), Type), File(F), Sym(S) {} static bool classof(const SymbolBody *S) { return S->kind() == LazyArchiveKind; @@ -396,8 +336,8 @@ // --start-lib and --end-lib options. class LazyObject : public Lazy { public: - LazyObject(StringRef Name, MemoryBufferRef M) - : Lazy(LazyObjectKind, Name), MBRef(M) {} + LazyObject(StringRef Name, MemoryBufferRef M, uint8_t Type) + : Lazy(LazyObjectKind, Name, Type), MBRef(M) {} static bool classof(const SymbolBody *S) { return S->kind() == LazyObjectKind; @@ -424,16 +364,8 @@ static DefinedRegular *End; static DefinedRegular *End2; - // The content for _gp symbol for MIPS target. - static SymbolBody *MipsGp; - - static SymbolBody *MipsLocalGp; + // The content for _gp_disp symbol for MIPS target. static SymbolBody *MipsGpDisp; - - // __rel_iplt_start/__rel_iplt_end for signaling - // where R_[*]_IRELATIVE relocations do live. - static SymbolBody *RelaIpltStart; - static SymbolBody *RelaIpltEnd; }; template DefinedRegular *ElfSym::Etext; @@ -442,11 +374,76 @@ template DefinedRegular *ElfSym::Edata2; template DefinedRegular *ElfSym::End; template DefinedRegular *ElfSym::End2; -template SymbolBody *ElfSym::MipsGp; -template SymbolBody *ElfSym::MipsLocalGp; template SymbolBody *ElfSym::MipsGpDisp; -template SymbolBody *ElfSym::RelaIpltStart; -template SymbolBody *ElfSym::RelaIpltEnd; + +// A real symbol object, SymbolBody, is usually stored within a Symbol. There's +// always one Symbol for each symbol name. The resolver updates the SymbolBody +// stored in the Body field of this object as it resolves symbols. Symbol also +// holds computed properties of symbol names. +struct Symbol { + uint32_t GlobalDynIndex = -1; + + // Symbol binding. This is on the Symbol to track changes during resolution. + // In particular: + // An undefined weak is still weak when it resolves to a shared library. + // An undefined weak will not fetch archive members, but we have to remember + // it is weak. + uint8_t Binding; + + // Symbol visibility. This is the computed minimum visibility of all + // observed non-DSO symbols. + unsigned Visibility : 2; + + // True if the symbol was used for linking and thus need to be added to the + // output file's symbol table. This is true for all symbols except for + // unreferenced DSO symbols and bitcode symbols that are unreferenced except + // by other bitcode objects. + unsigned IsUsedInRegularObj : 1; + + // If this flag is true and the symbol has protected or default visibility, it + // will appear in .dynsym. This flag is set by interposable DSO symbols in + // executables, by most symbols in DSOs and executables built with + // --export-dynamic, and by dynamic lists. + unsigned ExportDynamic : 1; + + // This flag acts as an additional filter on the dynamic symbol list. It is + // set if there is no version script, or if the symbol appears in the global + // section of the version script. + unsigned VersionScriptGlobal : 1; + + bool includeInDynsym() const; + bool isWeak() const { return Binding == llvm::ELF::STB_WEAK; } + + // This field is used to store the Symbol's SymbolBody. This instantiation of + // AlignedCharArrayUnion gives us a struct with a char array field that is + // large and aligned enough to store any derived class of SymbolBody. We + // assume that the size and alignment of ELF64LE symbols is sufficient for any + // ELFT, and we verify this with the static_asserts in replaceBody. + llvm::AlignedCharArrayUnion< + DefinedBitcode, DefinedCommon, DefinedRegular, + DefinedSynthetic, Undefined, + SharedSymbol, LazyArchive, LazyObject> + Body; + + SymbolBody *body() { return reinterpret_cast(Body.buffer); } + const SymbolBody *body() const { return const_cast(this)->body(); } +}; + +template +void replaceBody(Symbol *S, ArgT &&... Arg) { + static_assert(sizeof(T) <= sizeof(S->Body), "Body too small"); + static_assert(alignof(T) <= alignof(decltype(S->Body)), + "Body not aligned enough"); + static_assert(static_cast(static_cast(nullptr)) == nullptr, + "Not a SymbolBody"); + new (S->Body.buffer) T(std::forward(Arg)...); +} + +inline Symbol *SymbolBody::symbol() { + assert(!isLocal()); + return reinterpret_cast(reinterpret_cast(this) - + offsetof(Symbol, Body)); +} } // namespace elf } // namespace lld Index: lld/trunk/ELF/Symbols.cpp =================================================================== --- lld/trunk/ELF/Symbols.cpp +++ lld/trunk/ELF/Symbols.cpp @@ -79,7 +79,7 @@ return 0; case SymbolBody::LazyArchiveKind: case SymbolBody::LazyObjectKind: - assert(Body.Backref->IsUsedInRegularObj && "lazy symbol reached writer"); + assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer"); return 0; case SymbolBody::DefinedBitcodeKind: llvm_unreachable("should have been replaced"); @@ -89,22 +89,19 @@ SymbolBody::SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type) - : SymbolKind(K), Type(Type), Binding(STB_LOCAL), StOther(StOther), + : SymbolKind(K), IsLocal(true), Type(Type), StOther(StOther), NameOffset(NameOffset) { init(); } -SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type) - : SymbolKind(K), Type(Type), Binding(Binding), StOther(StOther), +SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) + : SymbolKind(K), IsLocal(false), Type(Type), StOther(StOther), Name({Name.data(), Name.size()}) { - assert(!isLocal()); init(); } void SymbolBody::init() { NeedsCopyOrPltAddr = false; - CanOmitFromDynSym = false; } // Returns true if a symbol can be replaced at load-time by a symbol @@ -122,14 +119,14 @@ return false; // Only symbols that appear in dynsym can be preempted. - if (!Backref->includeInDynsym()) + if (!symbol()->includeInDynsym()) return false; // Normally only default visibility symbols can be preempted, but -Bsymbolic // means that not even they can be preempted. if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc())) return !isDefined(); - return Backref->Visibility == STV_DEFAULT; + return symbol()->Visibility == STV_DEFAULT; } template @@ -177,79 +174,35 @@ return 0; } -// Returns 1, 0 or -1 if this symbol should take precedence -// over the Other, tie or lose, respectively. -int SymbolBody::compare(SymbolBody *Other) { - assert(!isLazy() && !Other->isLazy()); - std::tuple L(isDefined(), !isShared(), !isWeak()); - std::tuple R(Other->isDefined(), !Other->isShared(), - !Other->isWeak()); - - // Compare the two by symbol type. - if (L > R) - return -Other->compare(this); - if (L != R) - return -1; - if (!isDefined() || isShared() || isWeak()) - return 1; - - // If both are equal in terms of symbol type, then at least - // one of them must be a common symbol. Otherwise, they conflict. - auto *A = dyn_cast(this); - auto *B = dyn_cast(Other); - if (!A && !B) - return 0; - - // If both are common, the larger one is chosen. - if (A && B) { - if (Config->WarnCommon) - warning("multiple common of " + A->getName()); - A->Alignment = B->Alignment = std::max(A->Alignment, B->Alignment); - return A->Size < B->Size ? -1 : 1; - } - - // Non-common symbols takes precedence over common symbols. - if (Config->WarnCommon) - warning("common " + this->getName() + " is overridden"); - return A ? -1 : 1; -} - -Defined::Defined(Kind K, StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type) - : SymbolBody(K, Name, Binding, StOther, Type) {} +Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) + : SymbolBody(K, Name, StOther, Type) {} Defined::Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type) : SymbolBody(K, NameOffset, StOther, Type) {} -DefinedBitcode::DefinedBitcode(StringRef Name, bool IsWeak, uint8_t StOther) - : Defined(DefinedBitcodeKind, Name, IsWeak ? STB_WEAK : STB_GLOBAL, - StOther, 0 /* Type */) {} +DefinedBitcode::DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type, + BitcodeFile *F) + : Defined(DefinedBitcodeKind, Name, StOther, Type), File(F) {} bool DefinedBitcode::classof(const SymbolBody *S) { return S->kind() == DefinedBitcodeKind; } -Undefined::Undefined(StringRef Name, uint8_t Binding, uint8_t StOther, - uint8_t Type, bool IsBitcode) - : SymbolBody(SymbolBody::UndefinedKind, Name, Binding, StOther, Type) { - this->IsUndefinedBitcode = IsBitcode; -} +Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type) + : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {} Undefined::Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type) - : SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) { - this->IsUndefinedBitcode = false; -} + : SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) {} template DefinedSynthetic::DefinedSynthetic(StringRef N, uintX_t Value, OutputSectionBase &Section) - : Defined(SymbolBody::DefinedSyntheticKind, N, STB_GLOBAL, STV_HIDDEN, - 0 /* Type */), + : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */), Value(Value), Section(Section) {} DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, - uint8_t Binding, uint8_t StOther, uint8_t Type) - : Defined(SymbolBody::DefinedCommonKind, N, Binding, StOther, Type), + uint8_t StOther, uint8_t Type) + : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type), Alignment(Alignment), Size(Size) {} std::unique_ptr Lazy::getFile() { @@ -301,8 +254,8 @@ bool Symbol::includeInDynsym() const { if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) return false; - return (ExportDynamic && VersionScriptGlobal) || Body->isShared() || - (Body->isUndefined() && Config->Shared); + return (ExportDynamic && VersionScriptGlobal) || body()->isShared() || + (body()->isUndefined() && Config->Shared); } template uint32_t SymbolBody::template getVA(uint32_t) const; Index: lld/trunk/ELF/Writer.cpp =================================================================== --- lld/trunk/ELF/Writer.cpp +++ lld/trunk/ELF/Writer.cpp @@ -428,13 +428,8 @@ // True if non-preemptable symbol always has the same value regardless of where // the DSO is loaded. template static bool isAbsolute(const SymbolBody &Body) { - Symbol *Sym = Body.Backref; - if (Body.isUndefined()) { - if (!Sym) - return false; // undefined local. That is the dummy symbol 0. - if (Sym->isWeak()) - return true; // always 0 - } + if (Body.isUndefined()) + return !Body.isLocal() && Body.symbol()->isWeak(); if (const auto *DR = dyn_cast>(&Body)) return DR->Section == nullptr; // Absolute symbol. return false; @@ -771,7 +766,7 @@ if (Config->Relocatable) return; if (Config->Shared) - if (Sym->Backref->Visibility == STV_DEFAULT) + if (Sym->symbol()->Visibility == STV_DEFAULT) return; } @@ -1022,15 +1017,19 @@ Out::Bss->updateAlign(Align); uintX_t Shndx = SS->Sym.st_shndx; uintX_t Value = SS->Sym.st_value; - // Look through the DSO's dynamic symbol for aliases and create a dynamic - // symbol for each one. This causes the copy relocation to correctly interpose - // any aliases. - for (SharedSymbol &S : SS->File->getSharedSymbols()) { - if (S.Sym.st_shndx != Shndx || S.Sym.st_value != Value) + // Look through the DSO's dynamic symbol table for aliases and create a + // dynamic symbol for each one. This causes the copy relocation to correctly + // interpose any aliases. + for (const Elf_Sym &S : SS->File->getElfSymbols(true)) { + if (S.st_shndx != Shndx || S.st_value != Value) + continue; + auto *Alias = dyn_cast_or_null>( + Symtab.find(check(S.getName(SS->File->getStringTable())))); + if (!Alias) continue; - S.OffsetInBss = Off; - S.NeedsCopyOrPltAddr = true; - S.Backref->IsUsedInRegularObj = true; + Alias->OffsetInBss = Off; + Alias->NeedsCopyOrPltAddr = true; + Alias->symbol()->IsUsedInRegularObj = true; } Out::RelaDyn->addReloc( {Target->CopyRel, Out::Bss, SS->OffsetInBss, false, SS, 0}); @@ -1067,9 +1066,9 @@ } template -static SymbolBody * -addOptionalSynthetic(SymbolTable &Table, StringRef Name, - OutputSectionBase &Sec, typename ELFT::uint Val) { +static Symbol *addOptionalSynthetic(SymbolTable &Table, StringRef Name, + OutputSectionBase &Sec, + typename ELFT::uint Val) { if (!Table.find(Name)) return nullptr; return Table.addSynthetic(Name, Sec, Val); @@ -1085,16 +1084,15 @@ if (isOutputDynamic() || !Out::RelaPlt) return; StringRef S = Config->Rela ? "__rela_iplt_start" : "__rel_iplt_start"; - ElfSym::RelaIpltStart = - addOptionalSynthetic(Symtab, S, *Out::RelaPlt, 0); + addOptionalSynthetic(Symtab, S, *Out::RelaPlt, 0); S = Config->Rela ? "__rela_iplt_end" : "__rel_iplt_end"; - ElfSym::RelaIpltEnd = addOptionalSynthetic( - Symtab, S, *Out::RelaPlt, DefinedSynthetic::SectionEnd); + addOptionalSynthetic(Symtab, S, *Out::RelaPlt, + DefinedSynthetic::SectionEnd); } template static bool includeInSymtab(const SymbolBody &B) { - if (!B.Backref->IsUsedInRegularObj) + if (!B.symbol()->IsUsedInRegularObj) return false; if (auto *D = dyn_cast>(&B)) { @@ -1191,19 +1189,19 @@ // so that it points to an absolute address which is relative to GOT. // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - ElfSym::MipsGp = - Symtab.addSynthetic("_gp", *Out::Got, MipsGPOffset); + Symtab.addSynthetic("_gp", *Out::Got, MipsGPOffset); // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between // start of function and 'gp' pointer into GOT. ElfSym::MipsGpDisp = - addOptionalSynthetic(Symtab, "_gp_disp", *Out::Got, MipsGPOffset); + addOptionalSynthetic(Symtab, "_gp_disp", *Out::Got, MipsGPOffset) + ->body(); // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' // pointer. This symbol is used in the code generated by .cpload pseudo-op // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html - ElfSym::MipsLocalGp = addOptionalSynthetic( - Symtab, "__gnu_local_gp", *Out::Got, MipsGPOffset); + addOptionalSynthetic(Symtab, "__gnu_local_gp", *Out::Got, + MipsGPOffset); } // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol @@ -1363,7 +1361,7 @@ // synthesized ones. Visit all symbols to give the finishing touches. std::vector CommonSymbols; for (Symbol *S : Symtab.getSymbols()) { - SymbolBody *Body = S->Body; + SymbolBody *Body = S->body(); // Set "used" bit for --as-needed. if (S->IsUsedInRegularObj && !S->isWeak()) @@ -1781,7 +1779,7 @@ template static typename ELFT::uint getEntryAddr() { if (Symbol *S = Config->EntrySym) - return S->Body->getVA(); + return S->body()->getVA(); if (Config->EntryAddr != uint64_t(-1)) return Config->EntryAddr; return 0; Index: lld/trunk/docs/NewLLD.rst =================================================================== --- lld/trunk/docs/NewLLD.rst +++ lld/trunk/docs/NewLLD.rst @@ -186,23 +186,24 @@ * Symbol - Symbol is a pointer to a SymbolBody. There's only one Symbol for - each unique symbol name (this uniqueness is guaranteed by the symbol table). - Because SymbolBodies are created for each file independently, - there can be many SymbolBodies for the same name. - Thus, the relationship between Symbols and SymbolBodies is 1:N. - You can think of Symbols as handles for SymbolBodies. - - The resolver keeps the Symbol's pointer to always point to the "best" SymbolBody. - Pointer mutation is the resolve operation of this linker. - - SymbolBodies have pointers to their Symbols. - That means you can always find the best SymbolBody from - any SymbolBody by following pointers twice. - This structure makes it very easy and cheap to find replacements for symbols. - For example, if you have an Undefined SymbolBody, you can find a Defined - SymbolBody for that symbol just by going to its Symbol and then to SymbolBody, - assuming the resolver have successfully resolved all undefined symbols. + A Symbol is a container for a SymbolBody. There's only one Symbol for each + unique symbol name (this uniqueness is guaranteed by the symbol table). + Each global symbol has only one SymbolBody at any one time, which is + the SymbolBody stored within a memory region of the Symbol large enough + to store any SymbolBody. + + As the resolver reads symbols from input files, it replaces the Symbol's + SymbolBody with the "best" SymbolBody for its symbol name by constructing + the new SymbolBody in place on top of the existing SymbolBody. For example, + if the resolver is given a defined symbol, and the SymbolBody with its name + is undefined, it will construct a Defined SymbolBody over the Undefined + SymbolBody. + + This means that each SymbolBody pointer always points to the best SymbolBody, + and it is possible to get from a SymbolBody to a Symbol, or vice versa, + by adding or subtracting a fixed offset. This memory layout helps reduce + the cache miss rate through high locality and a small number of required + pointer indirections. * SymbolTable Index: lld/trunk/test/ELF/lto/common2.ll =================================================================== --- lld/trunk/test/ELF/lto/common2.ll +++ lld/trunk/test/ELF/lto/common2.ll @@ -18,7 +18,7 @@ ; SHARED-NEXT: Value: 0x2000 ; SHARED-NEXT: Size: 1 ; SHARED-NEXT: Binding: Global -; SHARED-NEXT: Type: None +; SHARED-NEXT: Type: Object ; SHARED-NEXT: Other: 0 ; SHARED-NEXT: Section: .bss ; SHARED-NEXT: } Index: lld/trunk/test/ELF/relocation-copy-alias.s =================================================================== --- lld/trunk/test/ELF/relocation-copy-alias.s +++ lld/trunk/test/ELF/relocation-copy-alias.s @@ -53,7 +53,7 @@ // CHECK: Name: a2 // CHECK-NEXT: Value: [[A]] // CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Global +// CHECK-NEXT: Binding: Weak // CHECK-NEXT: Type: Object (0x1) // CHECK-NEXT: Other: 0 // CHECK-NEXT: Section: .bss @@ -61,7 +61,7 @@ // CHECK: Name: b3 // CHECK-NEXT: Value: [[B]] // CHECK-NEXT: Size: 1 -// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Binding: Weak // CHECK-NEXT: Type: Object (0x1) // CHECK-NEXT: Other: 0 // CHECK-NEXT: Section: .bss