Index: llvm/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s @@ -0,0 +1,23 @@ +# static int i; // A local symbol. +# int f(void) { return i; } // An external symbol. + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 + .globl _f ## -- Begin function f + .p2align 4, 0x90 +_f: ## @f + .cfi_startproc +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl _i(%rip), %eax + popq %rbp + retq + .cfi_endproc + ## -- End function +.zerofill __DATA,__bss,_i,4,2 ## @i + +.subsections_via_symbols Index: llvm/test/tools/llvm-objcopy/MachO/real-world-input-copy.test =================================================================== --- llvm/test/tools/llvm-objcopy/MachO/real-world-input-copy.test +++ llvm/test/tools/llvm-objcopy/MachO/real-world-input-copy.test @@ -6,4 +6,10 @@ # RUN: llvm-objcopy %t.64.o %t.64.copy.o # RUN: cmp %t.64.o %t.64.copy.o +# Make sure that it properly constructs LC_DYSYMTAB and handles virtual sections +# (the bss section for example). +# RUN: llvm-mc -assemble -triple x86_64-apple-darwin9 -filetype=obj %p/Inputs/various-symbols.s -o %t.various-symbols.o +# RUN: llvm-objcopy %t.various-symbols.o %t.various-symbols.copy.o +# RUN: cmp %t.various-symbols.o %t.various-symbols.copy.o + # REQUIRES: x86-registered-target Index: llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -58,6 +58,8 @@ return createFileError(Config.InputFilename, std::move(E)); MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out); + if (auto E = Writer.finalize()) + return E; return Writer.write(); } Index: llvm/tools/llvm-objcopy/MachO/MachOReader.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -30,8 +30,11 @@ template Section constructSectionCommon(SectionType Sec) { Section S; - memcpy(S.Sectname, Sec.sectname, sizeof(Sec.sectname)); - memcpy(S.Segname, Sec.segname, sizeof(Sec.segname)); + S.Sectname = + StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))) + .str(); + S.Segname = + StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str(); S.Addr = Sec.addr; S.Size = Sec.size; S.Offset = Sec.offset; @@ -79,7 +82,6 @@ Section &S = Sections.back(); - StringRef SectName(S.Sectname); Expected SecRef = MachOObj.getSection(NextSectionIndex++); if (!SecRef) Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -29,8 +29,14 @@ size_t symTableSize() const; size_t strTableSize() const; + void updateDySymTab(MachO::macho_load_command &MLC); + void updateSizeOfCmds(); + Error layout(); + void writeHeader(); void writeLoadCommands(); + template + void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out); void writeSections(); void writeSymbolTable(); void writeStringTable(); @@ -46,6 +52,7 @@ : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {} size_t totalSize() const; + Error finalize(); Error write(); }; Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -11,6 +11,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" #include namespace llvm { @@ -128,13 +130,35 @@ void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); - MachO::macho_load_command MLC; for (const auto &LC : O.LoadCommands) { + // Construct a load command. + MachO::macho_load_command MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_data); + memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); + Begin += sizeof(MachO::segment_command); + + for (const auto &Sec : LC.Sections) + writeSectionInLoadCommand(Sec, Begin); + continue; + case MachO::LC_SEGMENT_64: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_64_data); + memcpy(Begin, &MLC.segment_command_64_data, + sizeof(MachO::segment_command_64)); + Begin += sizeof(MachO::segment_command_64); + + for (const auto &Sec : LC.Sections) + writeSectionInLoadCommand(Sec, Begin); + continue; + } + #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ - LC.MachOLoadCommand.load_command_data.cmdsize); \ - MLC = LC.MachOLoadCommand; \ + MLC.load_command_data.cmdsize); \ if (IsLittleEndian != sys::IsLittleEndianHost) \ MachO::swapStruct(MLC.LCStruct##_data); \ memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ @@ -143,11 +167,11 @@ Begin += LC.Payload.size(); \ break; - switch (LC.MachOLoadCommand.load_command_data.cmd) { + // Copy the load command as it is. + switch (MLC.load_command_data.cmd) { default: assert(sizeof(MachO::load_command) + LC.Payload.size() == - LC.MachOLoadCommand.load_command_data.cmdsize); - MLC = LC.MachOLoadCommand; + MLC.load_command_data.cmdsize); if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct(MLC.load_command_data); memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); @@ -160,9 +184,37 @@ } } +template +void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { + StructType Temp; + assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name"); + assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && + "too long section name"); + memset(&Temp, 0, sizeof(StructType)); + memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); + memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); + Temp.addr = Sec.Addr; + Temp.size = Sec.Size; + Temp.offset = Sec.Offset; + Temp.align = Sec.Align; + Temp.reloff = Sec.RelOff; + Temp.nreloc = Sec.NReloc; + Temp.flags = Sec.Flags; + Temp.reserved1 = Sec.Reserved1; + Temp.reserved2 = Sec.Reserved2; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Temp); + memcpy(Out, &Temp, sizeof(StructType)); + Out += sizeof(StructType); +} + void MachOWriter::writeSections() { for (const auto &LC : O.LoadCommands) for (const auto &Sec : LC.Sections) { + if (Sec.isVirtualSection()) + continue; + assert(Sec.Offset && "Section offset can not be zero"); assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), @@ -333,6 +385,184 @@ (this->*WriteOp.second)(); } +void MachOWriter::updateSizeOfCmds() { + auto Size = 0; + for (const auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + O.Header.SizeOfCmds = Size; +} + +// Updates the index and the number of local/external/undefined symbols. Here we +// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table +// are already sorted by the those types. +void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) { + uint32_t NumLocalSymbols = 0; + auto Iter = O.SymTable.NameList.begin(); + auto End = O.SymTable.NameList.end(); + for (; Iter != End; Iter++) { + if (Iter->n_type & (MachO::N_EXT | MachO::N_PEXT)) + break; + + NumLocalSymbols++; + } + + uint32_t NumExtDefSymbols = 0; + for (; Iter != End; Iter++) { + if ((Iter->n_type & MachO::N_TYPE) == MachO::N_UNDF) + break; + + NumExtDefSymbols++; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; + MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; + MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; + MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.NameList.size() - (NumLocalSymbols + NumExtDefSymbols); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +Error MachOWriter::layout() { + auto SizeOfCmds = loadCommandsSize(); + auto Offset = headerSize() + SizeOfCmds; + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = SizeOfCmds; + + // Lay out sections. + for (auto &LC : O.LoadCommands) { + uint64_t FileOff = Offset; + uint64_t VMSize = 0; + uint64_t FileOffsetInSegment = 0; + for (auto &Sec : LC.Sections) { + if (!Sec.isVirtualSection()) { + auto FilePaddingSize = + OffsetToAlignment(FileOffsetInSegment, 1 << Sec.Align); + Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize; + Sec.Size = Sec.Content.size(); + FileOffsetInSegment += FilePaddingSize + Sec.Size; + } + + VMSize = std::max(VMSize, Sec.Addr + Sec.Size); + } + + // TODO: Handle the __PAGEZERO segment. + auto &MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = FileOff; + MLC.segment_command_data.vmsize = VMSize; + MLC.segment_command_data.filesize = FileOffsetInSegment; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = FileOff; + MLC.segment_command_64_data.vmsize = VMSize; + MLC.segment_command_64_data.filesize = FileOffsetInSegment; + break; + } + + Offset += FileOffsetInSegment; + } + + // Lay out relocations. + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) { + Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; + Sec.NReloc = Sec.Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + } + + // Lay out tail stuff. + auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = Offset; + MLC.symtab_command_data.nsyms = O.SymTable.NameList.size(); + Offset += NListSize * MLC.symtab_command_data.nsyms; + MLC.symtab_command_data.stroff = Offset; + Offset += MLC.symtab_command_data.strsize; + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (MLC.dysymtab_command_data.nindirectsyms != 0) + return createStringError(llvm::errc::not_supported, + "indirect symbol table is not yet supported"); + + updateDySymTab(MLC); + break; + } + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOWriter::finalize() { + updateSizeOfCmds(); + + if (auto E = layout()) + return E; + + return Error::success(); +} + Error MachOWriter::write() { if (Error E = B.allocate(totalSize())) return E; Index: llvm/tools/llvm-objcopy/MachO/Object.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/Object.h +++ llvm/tools/llvm-objcopy/MachO/Object.h @@ -34,8 +34,8 @@ }; struct Section { - char Sectname[16]; - char Segname[16]; + std::string Sectname; + std::string Segname; uint64_t Addr; uint64_t Size; uint32_t Offset; @@ -49,6 +49,16 @@ StringRef Content; std::vector Relocations; + + MachO::SectionType getType() const { + return static_cast(Flags & MachO::SECTION_TYPE); + } + + bool isVirtualSection() const { + return (getType() == MachO::S_ZEROFILL || + getType() == MachO::S_GB_ZEROFILL || + getType() == MachO::S_THREAD_LOCAL_ZEROFILL); + } }; struct LoadCommand {