Index: llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -57,7 +57,11 @@ if (Error E = handleArgs(Config, *O)) return createFileError(Config.InputFilename, std::move(E)); - MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out); + // TODO: Support 16KB pages which employed in iOS arm64 binaries: + // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb + const uint64_t PageSize = 4096; + + MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); if (auto E = Writer.finalize()) return E; return Writer.write(); Index: llvm/tools/llvm-objcopy/MachO/MachOReader.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -36,6 +36,9 @@ void readWeakBindInfo(Object &O) const; void readLazyBindInfo(Object &O) const; void readExportInfo(Object &O) const; + void readDataInCodeData(Object &O) const; + void readFunctionStartsData(Object &O) const; + void readIndirectSymbolTable(Object &O) const; public: explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} Index: llvm/tools/llvm-objcopy/MachO/MachOReader.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -123,10 +123,19 @@ case MachO::LC_SYMTAB: O.SymTabCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_DYSYMTAB: + O.DySymTabCommandIndex = O.LoadCommands.size(); + break; case MachO::LC_DYLD_INFO: case MachO::LC_DYLD_INFO_ONLY: O.DyLdInfoCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_DATA_IN_CODE: + O.DataInCodeCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_FUNCTION_STARTS: + O.FunctionStartsCommandIndex = O.LoadCommands.size(); + break; } #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ @@ -208,6 +217,39 @@ O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); } +void MachOReader::readDataInCodeData(Object &O) const { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.DataInCode.Data = ArrayRef( + reinterpret_cast( + const_cast(MachOObj.getData().data()) + LDC.dataoff), + LDC.datasize); +} + +void MachOReader::readFunctionStartsData(Object &O) const { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.FunctionStarts.Data = ArrayRef( + reinterpret_cast( + const_cast(MachOObj.getData().data()) + LDC.dataoff), + LDC.datasize); +} + +void MachOReader::readIndirectSymbolTable(Object &O) const { + MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); + for (uint32_t i = 0; i < DySymTab.nindirectsyms; i++) + O.IndirectSymTable.Symbols.push_back( + MachOObj.getIndirectSymbolTableEntry(DySymTab, i)); +} + std::unique_ptr MachOReader::create() const { auto Obj = llvm::make_unique(); readHeader(*Obj); @@ -219,6 +261,9 @@ readWeakBindInfo(*Obj); readLazyBindInfo(*Obj); readExportInfo(*Obj); + readDataInCodeData(*Obj); + readFunctionStartsData(*Obj); + readIndirectSymbolTable(*Obj); return Obj; } Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -22,7 +22,10 @@ Object &O; bool Is64Bit; bool IsLittleEndian; + uint64_t PageSize; Buffer &B; + // Points to the __LINKEDIT segment if it exists. + MachO::macho_load_command *LinkEditLoadCommand = nullptr; size_t headerSize() const; size_t loadCommandsSize() const; @@ -30,7 +33,10 @@ size_t strTableSize() const; void updateDySymTab(MachO::macho_load_command &MLC); - void updateSizeOfCmds(); + uint32_t computeSizeOfCmds(); + uint64_t layoutSegments(); + uint64_t layoutRelocations(uint64_t Offset); + Error layoutTail(uint64_t Offset); Error layout(); void writeHeader(); @@ -45,11 +51,16 @@ void writeWeakBindInfo(); void writeLazyBindInfo(); void writeExportInfo(); + void writeIndirectSymbolTable(); + void writeDataInCodeData(); + void writeFunctionStartsData(); void writeTail(); public: - MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B) - : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {} + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize, + Buffer &B) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), + PageSize(PageSize), B(B) {} size_t totalSize() const; Error finalize(); Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -92,6 +92,36 @@ } } + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Ends.push_back(DySymTabCommand.indirectsymoff + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + // Otherwise, use the last section / reloction. for (const auto &LC : O.LoadCommands) for (const auto &S : LC.Sections) { @@ -342,6 +372,45 @@ memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); } +void MachOWriter::writeIndirectSymbolTable() { + if (!O.DySymTabCommandIndex) + return; + + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff; + assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) && + "Incorrect indirect symbol table size"); + memcpy(Out, O.IndirectSymTable.Symbols.data(), + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); +} + +void MachOWriter::writeDataInCodeData() { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && + "Incorrect data in code data size"); + memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); +} + +void MachOWriter::writeFunctionStartsData() { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && + "Incorrect function starts data size"); + memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); +} + void MachOWriter::writeTail() { typedef void (MachOWriter::*WriteHandlerType)(void); typedef std::pair WriteOperation; @@ -377,6 +446,36 @@ {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); } + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Queue.push_back({DySymTabCommand.indirectsymoff, + &MachOWriter::writeIndirectSymbolTable}); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.push_back( + {LinkEditDataCommand.dataoff, &MachOWriter::writeDataInCodeData}); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.push_back( + {LinkEditDataCommand.dataoff, &MachOWriter::writeFunctionStartsData}); + } + llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { return LHS.first < RHS.first; }); @@ -385,12 +484,11 @@ (this->*WriteOp.second)(); } -void MachOWriter::updateSizeOfCmds() { - auto Size = 0; +uint32_t MachOWriter::computeSizeOfCmds() { + uint32_t Size = 0; for (const auto &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; auto cmd = MLC.load_command_data.cmd; - switch (cmd) { case MachO::LC_SEGMENT: Size += sizeof(MachO::segment_command) + @@ -405,14 +503,14 @@ switch (cmd) { #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ - Size += sizeof(MachO::LCStruct); \ + Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ break; #include "llvm/BinaryFormat/MachO.def" #undef HANDLE_LOAD_COMMAND } } - O.Header.SizeOfCmds = Size; + return Size; } // Updates the index and the number of local/external/undefined symbols. Here we @@ -448,7 +546,7 @@ // Recomputes and updates offset and size fields in load commands and sections // since they could be modified. -Error MachOWriter::layout() { +uint64_t MachOWriter::layoutSegments() { auto SizeOfCmds = loadCommandsSize(); auto Offset = headerSize() + SizeOfCmds; O.Header.NCmds = O.LoadCommands.size(); @@ -457,6 +555,31 @@ // Lay out sections. for (auto &LC : O.LoadCommands) { uint64_t FileOff = Offset; + auto &MLC = LC.MachOLoadCommand; + StringRef Segname; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + Segname = StringRef(MLC.segment_command_data.segname, + strnlen(MLC.segment_command_data.segname, + sizeof(MLC.segment_command_data.segname))); + break; + case MachO::LC_SEGMENT_64: + Segname = StringRef(MLC.segment_command_64_data.segname, + strnlen(MLC.segment_command_64_data.segname, + sizeof(MLC.segment_command_64_data.segname))); + break; + default: + continue; + } + + if (Segname == "__LINKEDIT") { + // We update the __LINKEDIT segment later (in layoutTail). + assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); + LinkEditLoadCommand = &MLC; + continue; + } + + // Update file offsets and sizes of sections. uint64_t VMSize = 0; uint64_t FileOffsetInSegment = 0; for (auto &Sec : LC.Sections) { @@ -472,7 +595,6 @@ } // TODO: Handle the __PAGEZERO segment. - auto &MLC = LC.MachOLoadCommand; switch (MLC.load_command_data.cmd) { case MachO::LC_SEGMENT: MLC.segment_command_data.cmdsize = @@ -497,7 +619,10 @@ Offset += FileOffsetInSegment; } - // Lay out relocations. + return Offset; +} + +uint64_t MachOWriter::layoutRelocations(uint64_t Offset) { for (auto &LC : O.LoadCommands) for (auto &Sec : LC.Sections) { Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; @@ -505,18 +630,64 @@ Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; } - // Lay out tail stuff. - auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + return Offset; +} + +Error MachOWriter::layoutTail(uint64_t Offset) { + // The order of LINKEDIT elements is as follows: + // rebase info, binding info, weak binding info, lazy binding info, export + // trie, data-in-code, symbol table, indirect symbol table, symbol table + // strings. + uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + uint64_t StartOfLinkEdit = Offset; + uint64_t StartOfRebaseInfo = StartOfLinkEdit; + uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); + uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); + uint64_t StartOfLazyBindingInfo = + StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); + uint64_t StartOfExportTrie = + StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); + uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); + uint64_t StartOfDataInCode = + StartOfFunctionStarts + O.FunctionStarts.Data.size(); + uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfIndirectSymbols = + StartOfSymbols + NListSize * O.SymTable.NameList.size(); + uint64_t StartOfSymbolStrings = + StartOfIndirectSymbols + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t LinkEditSize = + (StartOfSymbolStrings + strTableSize()) - StartOfLinkEdit; + + // Now we have determined the layout of the contents of the __LINKEDIT + // segment. Update its load command. + if (LinkEditLoadCommand) { + MachO::macho_load_command *MLC = LinkEditLoadCommand; + switch (LinkEditLoadCommand->load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); + MLC->segment_command_data.fileoff = StartOfLinkEdit; + MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_data.filesize = LinkEditSize; + break; + case MachO::LC_SEGMENT_64: + MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); + MLC->segment_command_64_data.fileoff = StartOfLinkEdit; + MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_64_data.filesize = LinkEditSize; + break; + } + } + for (auto &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; auto cmd = MLC.load_command_data.cmd; switch (cmd) { case MachO::LC_SYMTAB: - MLC.symtab_command_data.symoff = Offset; + MLC.symtab_command_data.symoff = StartOfSymbols; MLC.symtab_command_data.nsyms = O.SymTable.NameList.size(); - Offset += NListSize * MLC.symtab_command_data.nsyms; - MLC.symtab_command_data.stroff = Offset; - Offset += MLC.symtab_command_data.strsize; + MLC.symtab_command_data.stroff = StartOfSymbolStrings; + MLC.symtab_command_data.strsize = strTableSize(); break; case MachO::LC_DYSYMTAB: { if (MLC.dysymtab_command_data.ntoc != 0 || @@ -527,13 +698,44 @@ return createStringError(llvm::errc::not_supported, "shared library is not yet supported"); - if (MLC.dysymtab_command_data.nindirectsyms != 0) - return createStringError(llvm::errc::not_supported, - "indirect symbol table is not yet supported"); + if (!O.IndirectSymTable.Symbols.empty()) { + MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; + MLC.dysymtab_command_data.nindirectsyms = + O.IndirectSymTable.Symbols.size(); + } updateDySymTab(MLC); break; } + case MachO::LC_DATA_IN_CODE: + MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; + MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); + break; + case MachO::LC_FUNCTION_STARTS: + MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; + MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + MLC.dyld_info_command_data.rebase_off = + O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; + MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); + MLC.dyld_info_command_data.bind_off = + O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; + MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); + MLC.dyld_info_command_data.weak_bind_off = + O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; + MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); + MLC.dyld_info_command_data.lazy_bind_off = + O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; + MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); + MLC.dyld_info_command_data.export_off = + O.Exports.Trie.empty() ? 0 : StartOfExportTrie; + MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); + break; + case MachO::LC_LOAD_DYLINKER: + case MachO::LC_MAIN: + case MachO::LC_RPATH: case MachO::LC_SEGMENT: case MachO::LC_SEGMENT_64: case MachO::LC_VERSION_MIN_MACOSX: @@ -554,8 +756,17 @@ return Error::success(); } +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +Error MachOWriter::layout() { + uint64_t Offset = layoutSegments(); + Offset = layoutRelocations(Offset); + return layoutTail(Offset); +} + Error MachOWriter::finalize() { - updateSizeOfCmds(); + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = computeSizeOfCmds(); if (auto E = layout()) return E; Index: llvm/tools/llvm-objcopy/MachO/Object.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/Object.h +++ llvm/tools/llvm-objcopy/MachO/Object.h @@ -93,6 +93,10 @@ std::vector NameList; }; +struct IndirectSymbolTable { + std::vector Symbols; +}; + /// The location of the string table inside the binary is described by LC_SYMTAB /// load command. struct StringTable { @@ -192,6 +196,10 @@ ArrayRef Trie; }; +struct LinkData { + ArrayRef Data; +}; + struct Object { MachHeader Header; std::vector LoadCommands; @@ -204,11 +212,20 @@ WeakBindInfo WeakBinds; LazyBindInfo LazyBinds; ExportInfo Exports; + IndirectSymbolTable IndirectSymTable; + LinkData DataInCode; + LinkData FunctionStarts; /// The index of LC_SYMTAB load command if present. Optional SymTabCommandIndex; /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. Optional DyLdInfoCommandIndex; + /// The index LC_DYSYMTAB load comamnd if present. + Optional DySymTabCommandIndex; + /// The index LC_DATA_IN_CODE load comamnd if present. + Optional DataInCodeCommandIndex; + /// The index LC_FUNCTION_STARTS load comamnd if present. + Optional FunctionStartsCommandIndex; }; } // end namespace macho