Index: llvm/tools/llvm-objcopy/CMakeLists.txt =================================================================== --- llvm/tools/llvm-objcopy/CMakeLists.txt +++ llvm/tools/llvm-objcopy/CMakeLists.txt @@ -26,6 +26,7 @@ MachO/MachOObjcopy.cpp MachO/MachOReader.cpp MachO/MachOWriter.cpp + MachO/Object.cpp DEPENDS ObjcopyOptsTableGen StripOptsTableGen Index: llvm/tools/llvm-objcopy/MachO/MachOReader.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -30,7 +30,7 @@ void readHeader(Object &O) const; void readLoadCommands(Object &O) const; void readSymbolTable(Object &O) const; - void readStringTable(Object &O) const; + void setSymbolInRelocationInfo(Object &O) const; void readRebaseInfo(Object &O) const; void readBindInfo(Object &O) const; void readWeakBindInfo(Object &O) const; Index: llvm/tools/llvm-objcopy/MachO/MachOReader.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -97,8 +97,16 @@ S.Relocations.reserve(S.NReloc); for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); - RI != RE; ++RI) - S.Relocations.push_back(MachOObj.getRelocation(RI->getRawDataRefImpl())); + RI != RE; ++RI) { + RelocationInfo R; + R.Symbol = nullptr; // We'll fill this field later. + R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); + R.Scattered = + reinterpret_cast(&R.Info) + ->r_scattered; + S.Relocations.push_back(R); + } + assert(S.NReloc == S.Relocations.size() && "Incorrect number of relocations"); } @@ -157,35 +165,43 @@ } } -template NListEntry constructNameList(const nlist_t &nlist) { - NListEntry NL; - NL.n_strx = nlist.n_strx; - NL.n_type = nlist.n_type; - NL.n_sect = nlist.n_sect; - NL.n_desc = nlist.n_desc; - NL.n_value = nlist.n_value; - return NL; +template +SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { + assert(nlist.n_strx < StrTable.size() && + "n_strx exceeds the size of the string table"); + SymbolEntry SE; + SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); + SE.n_type = nlist.n_type; + SE.n_sect = nlist.n_sect; + SE.n_desc = nlist.n_desc; + SE.n_value = nlist.n_value; + return SE; } void MachOReader::readSymbolTable(Object &O) const { + StringRef StrTable = MachOObj.getStringTableData(); for (auto Symbol : MachOObj.symbols()) { - NListEntry NLE = - MachOObj.is64Bit() - ? constructNameList( - MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) - : constructNameList( - MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())); - O.SymTable.NameList.push_back(NLE); + SymbolEntry SE = + (MachOObj.is64Bit() + ? constructSymbolEntry( + StrTable, + MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) + : constructSymbolEntry( + StrTable, + MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); + + O.SymTable.Symbols.push_back(llvm::make_unique(SE)); } } -void MachOReader::readStringTable(Object &O) const { - StringRef Data = MachOObj.getStringTableData(); - SmallVector Strs; - Data.split(Strs, '\0'); - O.StrTable.Strings.reserve(Strs.size()); - for (auto S : Strs) - O.StrTable.Strings.push_back(S.str()); +void MachOReader::setSymbolInRelocationInfo(Object &O) const { + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) + for (auto &Reloc : Sec.Relocations) + if (!Reloc.Scattered) { + auto *Info = reinterpret_cast(&Reloc.Info); + Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum); + } } void MachOReader::readRebaseInfo(Object &O) const { @@ -213,7 +229,7 @@ readHeader(*Obj); readLoadCommands(*Obj); readSymbolTable(*Obj); - readStringTable(*Obj); + setSymbolInRelocationInfo(*Obj); readRebaseInfo(*Obj); readBindInfo(*Obj); readWeakBindInfo(*Obj); Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -23,6 +23,7 @@ bool Is64Bit; bool IsLittleEndian; Buffer &B; + StringTableBuilder StrTableBuilder{StringTableBuilder::MachO}; size_t headerSize() const; size_t loadCommandsSize() const; @@ -31,6 +32,8 @@ void updateDySymTab(MachO::macho_load_command &MLC); void updateSizeOfCmds(); + void updateSymbolIndexes(); + void constructStringTable(); Error layout(); void writeHeader(); Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -26,18 +26,10 @@ size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } size_t MachOWriter::symTableSize() const { - return O.SymTable.NameList.size() * + return O.SymTable.Symbols.size() * (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); } -size_t MachOWriter::strTableSize() const { - size_t S = 0; - for (const auto &Str : O.StrTable.Strings) - S += Str.size(); - S += (O.StrTable.Strings.empty() ? 0 : O.StrTable.Strings.size() - 1); - return S; -} - size_t MachOWriter::totalSize() const { // Going from tail to head and looking for an appropriate "anchor" to // calculate the total size assuming that all the offsets are either valid @@ -49,12 +41,12 @@ O.LoadCommands[*O.SymTabCommandIndex] .MachOLoadCommand.symtab_command_data; if (SymTabCommand.symoff) { - assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) && + assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) && "Incorrect number of symbols"); Ends.push_back(SymTabCommand.symoff + symTableSize()); } if (SymTabCommand.stroff) { - assert((SymTabCommand.strsize == strTableSize()) && + assert((SymTabCommand.strsize == StrTableBuilder.getSize()) && "Incorrect string table size"); Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); } @@ -128,6 +120,14 @@ memcpy(B.getBufferStart(), &Header, HeaderSize); } +void MachOWriter::updateSymbolIndexes() { + uint32_t Index = 0; + for (auto &Symbol : O.SymTable.Symbols) { + Symbol->Index = Index; + Index++; + } +} + void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); for (const auto &LC : O.LoadCommands) { @@ -220,24 +220,32 @@ memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), Sec.Content.size()); for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) { - MachO::any_relocation_info R = Sec.Relocations[Index]; + auto RelocInfo = Sec.Relocations[Index]; + if (!RelocInfo.Scattered) { + auto *Info = + reinterpret_cast(&RelocInfo.Info); + Info->r_symbolnum = RelocInfo.Symbol->Index; + } + if (IsLittleEndian != sys::IsLittleEndianHost) - MachO::swapStruct(R); + MachO::swapStruct( + reinterpret_cast(RelocInfo.Info)); memcpy(B.getBufferStart() + Sec.RelOff + Index * sizeof(MachO::any_relocation_info), - &R, sizeof(R)); + &RelocInfo.Info, sizeof(RelocInfo.Info)); } } } template -void writeNListEntry(const NListEntry &NLE, bool IsLittleEndian, char *&Out) { +void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, + uint32_t Nstrx) { NListType ListEntry; - ListEntry.n_strx = NLE.n_strx; - ListEntry.n_type = NLE.n_type; - ListEntry.n_sect = NLE.n_sect; - ListEntry.n_desc = NLE.n_desc; - ListEntry.n_value = NLE.n_value; + ListEntry.n_strx = Nstrx; + ListEntry.n_type = SE.n_type; + ListEntry.n_sect = SE.n_sect; + ListEntry.n_desc = SE.n_desc; + ListEntry.n_value = SE.n_value; if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct(ListEntry); @@ -251,15 +259,9 @@ const MachO::symtab_command &SymTabCommand = O.LoadCommands[*O.SymTabCommandIndex] .MachOLoadCommand.symtab_command_data; - assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) && - "Incorrect number of symbols"); - char *Out = (char *)B.getBufferStart() + SymTabCommand.symoff; - for (auto NLE : O.SymTable.NameList) { - if (Is64Bit) - writeNListEntry(NLE, IsLittleEndian, Out); - else - writeNListEntry(NLE, IsLittleEndian, Out); - } + + uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff; + StrTableBuilder.write(StrTable); } void MachOWriter::writeStringTable() { @@ -268,17 +270,17 @@ const MachO::symtab_command &SymTabCommand = O.LoadCommands[*O.SymTabCommandIndex] .MachOLoadCommand.symtab_command_data; - char *Out = (char *)B.getBufferStart() + SymTabCommand.stroff; - assert((SymTabCommand.strsize == strTableSize()) && - "Incorrect string table size"); - for (size_t Index = 0; Index < O.StrTable.Strings.size(); ++Index) { - memcpy(Out, O.StrTable.Strings[Index].data(), - O.StrTable.Strings[Index].size()); - Out += O.StrTable.Strings[Index].size(); - if (Index + 1 != O.StrTable.Strings.size()) { - memcpy(Out, "\0", 1); - Out += 1; - } + + char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff; + for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); + Iter != End; Iter++) { + SymbolEntry *Sym = Iter->get(); + auto Nstrx = StrTableBuilder.getOffset(Sym->Name); + + if (Is64Bit) + writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); + else + writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); } } @@ -420,10 +422,10 @@ // are already sorted by the those types. void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) { uint32_t NumLocalSymbols = 0; - auto Iter = O.SymTable.NameList.begin(); - auto End = O.SymTable.NameList.end(); + auto Iter = O.SymTable.Symbols.begin(); + auto End = O.SymTable.Symbols.end(); for (; Iter != End; Iter++) { - if (Iter->n_type & (MachO::N_EXT | MachO::N_PEXT)) + if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT)) break; NumLocalSymbols++; @@ -431,7 +433,7 @@ uint32_t NumExtDefSymbols = 0; for (; Iter != End; Iter++) { - if ((Iter->n_type & MachO::N_TYPE) == MachO::N_UNDF) + if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF) break; NumExtDefSymbols++; @@ -443,7 +445,7 @@ MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; MLC.dysymtab_command_data.nundefsym = - O.SymTable.NameList.size() - (NumLocalSymbols + NumExtDefSymbols); + O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); } // Recomputes and updates offset and size fields in load commands and sections @@ -512,8 +514,9 @@ auto cmd = MLC.load_command_data.cmd; switch (cmd) { case MachO::LC_SYMTAB: + MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); + MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); MLC.symtab_command_data.symoff = Offset; - MLC.symtab_command_data.nsyms = O.SymTable.NameList.size(); Offset += NListSize * MLC.symtab_command_data.nsyms; MLC.symtab_command_data.stroff = Offset; Offset += MLC.symtab_command_data.strsize; @@ -554,8 +557,15 @@ return Error::success(); } +void MachOWriter::constructStringTable() { + for (std::unique_ptr &Sym : O.SymTable.Symbols) + StrTableBuilder.add(Sym->Name); + StrTableBuilder.finalize(); +} + Error MachOWriter::finalize() { updateSizeOfCmds(); + constructStringTable(); if (auto E = layout()) return E; @@ -568,6 +578,7 @@ return E; memset(B.getBufferStart(), 0, totalSize()); writeHeader(); + updateSymbolIndexes(); writeLoadCommands(); writeSections(); writeTail(); Index: llvm/tools/llvm-objcopy/MachO/Object.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/Object.h +++ llvm/tools/llvm-objcopy/MachO/Object.h @@ -12,6 +12,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/Support/YAMLTraits.h" #include @@ -33,6 +34,7 @@ uint32_t Reserved = 0; }; +struct RelocationInfo; struct Section { std::string Sectname; std::string Segname; @@ -48,7 +50,7 @@ uint32_t Reserved3; StringRef Content; - std::vector Relocations; + std::vector Relocations; MachO::SectionType getType() const { return static_cast(Flags & MachO::SECTION_TYPE); @@ -79,8 +81,11 @@ std::vector
Sections; }; -struct NListEntry { - uint32_t n_strx; +// A symbol information. Fields which starts with "n_" are same as them in the +// nlist. +struct SymbolEntry { + std::string Name; + uint32_t Index; uint8_t n_type; uint8_t n_sect; uint16_t n_desc; @@ -90,7 +95,9 @@ /// The location of the symbol table inside the binary is described by LC_SYMTAB /// load command. struct SymbolTable { - std::vector NameList; + std::vector> Symbols; + + const SymbolEntry *getSymbolByIndex(uint32_t Index) const; }; /// The location of the string table inside the binary is described by LC_SYMTAB @@ -99,6 +106,13 @@ std::vector Strings; }; +struct RelocationInfo { + const SymbolEntry *Symbol; + // True if Info is a scattered_relocation_info. + bool Scattered; + MachO::any_relocation_info Info; +}; + /// The location of the rebase info inside the binary is described by /// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at /// an address different from its preferred address. The rebase information is Index: llvm/tools/llvm-objcopy/MachO/Object.cpp =================================================================== --- /dev/null +++ llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -0,0 +1,15 @@ +#include "Object.h" +#include "../llvm-objcopy.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { + assert(Index < Symbols.size() && "invalid symbol index"); + return Symbols[Index].get(); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm