Index: ELF/InputFiles.h =================================================================== --- ELF/InputFiles.h +++ ELF/InputFiles.h @@ -139,10 +139,11 @@ // Index of MIPS GOT built for this file. llvm::Optional MipsGotIndex; + std::vector Symbols; + protected: InputFile(Kind K, MemoryBufferRef M); std::vector Sections; - std::vector Symbols; private: const Kind FileKind; @@ -255,7 +256,6 @@ StringRef getSectionName(const Elf_Shdr &Sec); bool shouldMerge(const Elf_Shdr &Sec); - Symbol *createSymbol(const Elf_Sym *Sym); // Each ELF symbol contains a section index which the symbol belongs to. // However, because the number of bits dedicated for that is limited, a Index: ELF/InputFiles.cpp =================================================================== --- ELF/InputFiles.cpp +++ ELF/InputFiles.cpp @@ -913,62 +913,91 @@ return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this); } +// Initialize this->Symbols. this->Symbols is a parallel array as +// its corresponding ELF symbol table. template void ObjFile::initializeSymbols() { - this->Symbols.reserve(this->getELFSyms().size()); - for (const Elf_Sym &Sym : this->getELFSyms()) - this->Symbols.push_back(createSymbol(&Sym)); -} - -template Symbol *ObjFile::createSymbol(const Elf_Sym *Sym) { - uint32_t SecIdx = getSectionIndex(*Sym); - if (SecIdx >= this->Sections.size()) - fatal(toString(this) + ": invalid section index: " + Twine(SecIdx)); - - InputSectionBase *Sec = this->Sections[SecIdx]; - uint8_t Binding = Sym->getBinding(); - uint8_t StOther = Sym->st_other; - uint8_t Type = Sym->getType(); - uint64_t Value = Sym->st_value; - uint64_t Size = Sym->st_size; + ArrayRef ESyms = this->getELFSyms(); + this->Symbols.resize(ESyms.size()); - if (Binding == STB_LOCAL) { - if (Sym->getType() == STT_FILE) - SourceFile = CHECK(Sym->getName(this->StringTable), this); - - if (this->StringTable.size() <= Sym->st_name) - fatal(toString(this) + ": invalid symbol name offset"); - - StringRefZ Name = this->StringTable.data() + Sym->st_name; - if (Sym->st_shndx == SHN_UNDEF) - return make(this, Name, Binding, StOther, Type); - return make(this, Name, Binding, StOther, Type, Value, Size, Sec); - } + // Our symbol table may have already been partially initialized + // because of LazyObjFile. + for (size_t I = 0, End = ESyms.size(); I != End; ++I) + if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL) + this->Symbols[I] = + Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this)); + + // Fill this->Symbols. A symbol is either local or global. + for (size_t I = 0, End = ESyms.size(); I != End; ++I) { + const Elf_Sym &ESym = ESyms[I]; + + // Read symbol attributes. + uint32_t SecIdx = getSectionIndex(ESym); + if (SecIdx >= this->Sections.size()) + fatal(toString(this) + ": invalid section index: " + Twine(SecIdx)); + + InputSectionBase *Sec = this->Sections[SecIdx]; + uint8_t Binding = ESym.getBinding(); + uint8_t StOther = ESym.st_other; + uint8_t Type = ESym.getType(); + uint64_t Value = ESym.st_value; + uint64_t Size = ESym.st_size; + StringRefZ Name = this->StringTable.data() + ESym.st_name; + + // Handle local symbols. Local symbols are not added to the symbol + // table because they are not visible from other object files. We + // allocate symbol instances and add their pointers to Symbols. + if (Binding == STB_LOCAL) { + if (ESym.getType() == STT_FILE) + SourceFile = CHECK(ESym.getName(this->StringTable), this); + + if (this->StringTable.size() <= ESym.st_name) + fatal(toString(this) + ": invalid symbol name offset"); + + if (ESym.st_shndx == SHN_UNDEF) + this->Symbols[I] = make(this, Name, Binding, StOther, Type); + else + this->Symbols[I] = + make(this, Name, Binding, StOther, Type, Value, Size, Sec); + continue; + } - StringRef Name = CHECK(Sym->getName(this->StringTable), this); + // Handle global undefined symbols. + if (ESym.st_shndx == SHN_UNDEF) { + resolveSymbol(this->Symbols[I], + Undefined{this, Name, Binding, StOther, Type}); + continue; + } - if (Sym->st_shndx == SHN_UNDEF) - return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type}); + // Handle global common symbols. + if (ESym.st_shndx == SHN_COMMON) { + if (Value == 0 || Value >= UINT32_MAX) + fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) + + "' has invalid alignment: " + Twine(Value)); + resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther, + Type, Value, Size}); + continue; + } - if (Sec == &InputSection::Discarded) - return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type, - /*DiscardedSecIdx=*/SecIdx}); + // If a defined symbol is in a discarded section, handle it as if it + // were an undefined symbol. Such symbol doesn't comply with the + // standard, but in practice, a .eh_frame often directly refer + // COMDAT member sections, and if a comdat group is discarded, some + // defined symbol in a .eh_frame becomes dangling symbols. + if (Sec == &InputSection::Discarded) { + resolveSymbol(this->Symbols[I], + Undefined{this, Name, Binding, StOther, Type, SecIdx}); + continue; + } - if (Sym->st_shndx == SHN_COMMON) { - if (Value == 0 || Value >= UINT32_MAX) - fatal(toString(this) + ": common symbol '" + Name + - "' has invalid alignment: " + Twine(Value)); - return Symtab->addSymbol( - CommonSymbol{this, Name, Binding, StOther, Type, Value, Size}); - } + // Handle global defined symbols. + if (Binding == STB_GLOBAL || Binding == STB_WEAK || + Binding == STB_GNU_UNIQUE) { + resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther, + Type, Value, Size, Sec}); + continue; + } - switch (Binding) { - default: fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding)); - case STB_GLOBAL: - case STB_WEAK: - case STB_GNU_UNIQUE: - return Symtab->addSymbol( - Defined{this, Name, Binding, StOther, Type, Value, Size, Sec}); } } @@ -1455,10 +1484,16 @@ InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive); File->GroupId = GroupId; + + // Copy symbol vector so that the new InputFile doesn't have to + // insert the same defined symbols to the symbol table again. + File->Symbols = std::move(Symbols); return File; } template void LazyObjFile::parse() { + using Elf_Sym = typename ELFT::Sym; + // A lazy object file wraps either a bitcode file or an ELF file. if (isBitcode(this->MB)) { std::unique_ptr Obj = @@ -1476,6 +1511,7 @@ return; } + // Find a symbol table. ELFFile Obj = check(ELFFile::create(MB.getBuffer())); ArrayRef Sections = CHECK(Obj.sections(), this); @@ -1483,16 +1519,28 @@ if (Sec.sh_type != SHT_SYMTAB) continue; - typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this); + // A symbol table is found. + ArrayRef ESyms = CHECK(Obj.symbols(&Sec), this); uint32_t FirstGlobal = Sec.sh_info; - StringRef StringTable = - CHECK(Obj.getStringTableForSymtab(Sec, Sections), this); + StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this); + this->Symbols.resize(ESyms.size()); - for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal)) { - if (Sym.st_shndx == SHN_UNDEF) + // Get existing symbols or insert placeholder symbols. + for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I) + if (ESyms[I].st_shndx != SHN_UNDEF) + this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this)); + + // Replace existing symbols with LazyObject symbols. + // + // resolveSymbol() may trigger this->fetch() if an existing symbol + // is an undefined symbol. If that happens, this LazyObjFile has + // served its purpose, and we can exit from the loop early. + for (Symbol *Sym : this->Symbols) { + if (!Sym) continue; - Symtab->addSymbol( - LazyObject{*this, CHECK(Sym.getName(StringTable), this)}); + resolveSymbol(Sym, LazyObject{*this, Sym->getName()}); + if (AddedToLink) + return; } return; } Index: ELF/SymbolTable.cpp =================================================================== --- ELF/SymbolTable.cpp +++ ELF/SymbolTable.cpp @@ -92,6 +92,7 @@ Symbol *Sym = reinterpret_cast(make()); SymVector.push_back(Sym); + Sym->setName(Name); Sym->SymbolKind = Symbol::PlaceholderKind; Sym->VersionId = Config->DefaultSymbolVersion; Sym->Visibility = STV_DEFAULT;