Index: llvm/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s @@ -0,0 +1,64 @@ +# static int local1, local2; // Local Symbols. +# int global1; // A coomon symmbol. +# char global2 = 123; // A extern symbol. +# int global3 = 456; // A extern symbol. +# extern int extern1, extern2, extern3, extern4; // Undefined symbols. +# int func(void); // A undefined symbol. +# +# int sum() { +# return global1 + global2 + local1 + local2 + +# extern1 + extern2 + extern3 + extern4 + func(); +# } + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 + .globl _sum ## -- Begin function sum + .p2align 4, 0x90 +_sum: ## @sum + .cfi_startproc +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movq _extern4@GOTPCREL(%rip), %rax + movq _extern3@GOTPCREL(%rip), %rcx + movq _extern2@GOTPCREL(%rip), %rdx + movq _extern1@GOTPCREL(%rip), %rsi + movq _global1@GOTPCREL(%rip), %rdi + movl (%rdi), %r8d + movsbl _global2(%rip), %r9d + addl %r9d, %r8d + addl _local1(%rip), %r8d + addl _local2(%rip), %r8d + addl (%rsi), %r8d + addl (%rdx), %r8d + addl (%rcx), %r8d + addl (%rax), %r8d + movl %r8d, -4(%rbp) ## 4-byte Spill + callq _func + movl -4(%rbp), %r8d ## 4-byte Reload + addl %eax, %r8d + movl %r8d, %eax + addq $16, %rsp + popq %rbp + retq + .cfi_endproc + ## -- End function + .section __DATA,__data + .globl _global2 ## @global2 +_global2: + .byte 123 ## 0x7b + + .globl _global3 ## @global3 + .p2align 2 +_global3: + .long 456 ## 0x1c8 + + .comm _global1,4,2 ## @global1 +.zerofill __DATA,__bss,_local1,4,2 ## @local1 +.zerofill __DATA,__bss,_local2,4,2 ## @local2 + +.subsections_via_symbols Index: llvm/test/tools/llvm-objcopy/MachO/real-world-input-copy.test =================================================================== --- llvm/test/tools/llvm-objcopy/MachO/real-world-input-copy.test +++ llvm/test/tools/llvm-objcopy/MachO/real-world-input-copy.test @@ -6,4 +6,10 @@ # RUN: llvm-objcopy %t.64.o %t.64.copy.o # RUN: cmp %t.64.o %t.64.copy.o +# Make sure that it properly constructs LC_DYSYMTAB and handles virtual sections +# (the bss section for example). +# RUN: llvm-mc -assemble -triple x86_64-apple-darwin9 -filetype=obj %p/Inputs/various-symbols.s -o %t.some-symbols.o +# RUN: llvm-objcopy %t.some-symbols.o %t.some-symbols.copy.o +# RUN: cmp %t.some-symbols.o %t.some-symbols.copy.o + # REQUIRES: x86-registered-target Index: llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -58,6 +58,8 @@ return createFileError(Config.InputFilename, std::move(E)); MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out); + if (auto E = Writer.finalize()) + return E; return Writer.write(); } Index: llvm/tools/llvm-objcopy/MachO/MachOReader.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -17,6 +17,15 @@ namespace objcopy { namespace macho { +// Returns the length of the string. If Str is terminated by NUL, return +// strlen(Str), otherwise return MaxLen. +size_t strlenOrMaxLen(const char *Str, size_t MaxLen) { + if (Str[MaxLen - 1] != '\0') + return MaxLen; + + return strlen(Str); +} + void MachOReader::readHeader(Object &O) const { O.Header.Magic = MachOObj.getHeader().magic; O.Header.CPUType = MachOObj.getHeader().cputype; @@ -30,8 +39,8 @@ template Section constructSectionCommon(SectionType Sec) { Section S; - memcpy(S.Sectname, Sec.sectname, sizeof(Sec.sectname)); - memcpy(S.Segname, Sec.segname, sizeof(Sec.segname)); + S.Sectname = StringRef(Sec.sectname, strlenOrMaxLen(Sec.sectname, 16)).str(); + S.Segname = StringRef(Sec.segname, strlenOrMaxLen(Sec.segname, 16)).str(); S.Addr = Sec.addr; S.Size = Sec.size; S.Offset = Sec.offset; @@ -79,7 +88,6 @@ Section &S = Sections.back(); - StringRef SectName(S.Sectname); Expected SecRef = MachOObj.getSection(NextSectionIndex++); if (!SecRef) Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -29,6 +29,10 @@ size_t symTableSize() const; size_t strTableSize() const; + void updateDysymtab(MachO::macho_load_command &MLC); + void updateSizeOfCmds(); + Error layout(); + void writeHeader(); void writeLoadCommands(); void writeSections(); @@ -46,6 +50,7 @@ : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {} size_t totalSize() const; + Error finalize(); Error write(); }; Index: llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp =================================================================== --- llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -11,6 +11,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" #include namespace llvm { @@ -126,15 +128,78 @@ memcpy(B.getBufferStart(), &Header, HeaderSize); } +static void copyStringWithPadding(char *Dest, StringRef Src, size_t DestLen) { + size_t CopyLen = std::min(Src.size(), DestLen); + memcpy(reinterpret_cast(Dest), Src.data(), CopyLen); + memset(reinterpret_cast(Dest + CopyLen), '\0', DestLen - CopyLen); +} + void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); - MachO::macho_load_command MLC; for (const auto &LC : O.LoadCommands) { + // Construct a load command. + auto MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_data); + memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); + Begin += sizeof(MachO::segment_command); + + for (auto &Sec : LC.Sections) { + struct MachO::section Temp; + copyStringWithPadding(Temp.sectname, Sec.Sectname, 16); + copyStringWithPadding(Temp.segname, Sec.Segname, 16); + Temp.addr = Sec.Addr; + Temp.size = Sec.Size; + Temp.offset = Sec.Offset; + Temp.align = Sec.Align; + Temp.reloff = Sec.RelOff; + Temp.nreloc = Sec.NReloc; + Temp.flags = Sec.Flags; + Temp.reserved1 = Sec.Reserved1; + Temp.reserved2 = Sec.Reserved2; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Temp); + memcpy(Begin, &Temp, sizeof(MachO::section)); + Begin += sizeof(MachO::section); + } + continue; + case MachO::LC_SEGMENT_64: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_64_data); + memcpy(Begin, &MLC.segment_command_64_data, + sizeof(MachO::segment_command_64)); + Begin += sizeof(MachO::segment_command_64); + + for (auto &Sec : LC.Sections) { + struct MachO::section_64 Temp; + copyStringWithPadding(Temp.sectname, Sec.Sectname, 16); + copyStringWithPadding(Temp.segname, Sec.Segname, 16); + Temp.addr = Sec.Addr; + Temp.size = Sec.Size; + Temp.offset = Sec.Offset; + Temp.align = Sec.Align; + Temp.reloff = Sec.RelOff; + Temp.nreloc = Sec.NReloc; + Temp.flags = Sec.Flags; + Temp.reserved1 = Sec.Reserved1; + Temp.reserved2 = Sec.Reserved2; + Temp.reserved3 = Sec.Reserved3; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Temp); + memcpy(Begin, &Temp, sizeof(MachO::section_64)); + Begin += sizeof(MachO::section_64); + } + continue; + } + #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ - LC.MachOLoadCommand.load_command_data.cmdsize); \ - MLC = LC.MachOLoadCommand; \ + MLC.load_command_data.cmdsize); \ if (IsLittleEndian != sys::IsLittleEndianHost) \ MachO::swapStruct(MLC.LCStruct##_data); \ memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ @@ -143,11 +208,11 @@ Begin += LC.Payload.size(); \ break; - switch (LC.MachOLoadCommand.load_command_data.cmd) { + // Copy the load command as it is. + switch (MLC.load_command_data.cmd) { default: assert(sizeof(MachO::load_command) + LC.Payload.size() == - LC.MachOLoadCommand.load_command_data.cmdsize); - MLC = LC.MachOLoadCommand; + MLC.load_command_data.cmdsize); if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct(MLC.load_command_data); memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); @@ -163,6 +228,9 @@ void MachOWriter::writeSections() { for (const auto &LC : O.LoadCommands) for (const auto &Sec : LC.Sections) { + if (Sec.isVirtualSection()) + continue; + assert(Sec.Offset && "Section offset can not be zero"); assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), @@ -333,6 +401,184 @@ (this->*WriteOp.second)(); } +void MachOWriter::updateSizeOfCmds() { + auto Size = 0; + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + O.Header.SizeOfCmds = Size; +} + +// Updates the index and the number of local/external/undefined symbols. Here we +// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table +// are already sorted by the those types. +void MachOWriter::updateDysymtab(MachO::macho_load_command &MLC) { + auto nlocalsym = 0; + auto Iter = O.SymTable.NameList.begin(); + auto End = O.SymTable.NameList.end(); + for (; Iter != End; Iter++) { + if (Iter->n_type & (MachO::N_EXT | MachO::N_PEXT)) + break; + + nlocalsym++; + } + + auto nextdefsym = 0; + for (; Iter != End; Iter++) { + if ((Iter->n_type & MachO::N_TYPE) == MachO::N_UNDF) + break; + + nextdefsym++; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = nlocalsym; + MLC.dysymtab_command_data.iextdefsym = nlocalsym; + MLC.dysymtab_command_data.nextdefsym = nextdefsym; + MLC.dysymtab_command_data.iundefsym = nlocalsym + nextdefsym; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.NameList.size() - (nlocalsym + nextdefsym); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +Error MachOWriter::layout() { + auto SizeOfCmds = loadCommandsSize(); + auto Offset = headerSize() + SizeOfCmds; + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = SizeOfCmds; + + // Lay out sections. + for (auto &LC : O.LoadCommands) { + uint64_t FileOff = Offset; + uint64_t VmOffsetInSegment = 0; + uint64_t FileOffsetInSegment = 0; + for (auto &Sec : LC.Sections) { + auto Align = pow(2, Sec.Align); + if (!Sec.isVirtualSection()) { + auto FilePaddingSize = OffsetToAlignment(FileOffsetInSegment, Align); + Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize; + Sec.Size = Sec.Content.size(); + FileOffsetInSegment += FilePaddingSize + Sec.Size; + } + auto VmPaddingSize = OffsetToAlignment(VmOffsetInSegment, Align); + VmOffsetInSegment += VmPaddingSize + Sec.Size; + } + + // TODO: Handle the __PAGEZERO segment. + auto &MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = FileOff; + MLC.segment_command_data.vmsize = VmOffsetInSegment; + MLC.segment_command_data.filesize = FileOffsetInSegment; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = FileOff; + MLC.segment_command_64_data.vmsize = VmOffsetInSegment; + MLC.segment_command_64_data.filesize = FileOffsetInSegment; + break; + } + + Offset += FileOffsetInSegment; + } + + // Lay out relocations. + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) { + Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; + Sec.NReloc = Sec.Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + } + + // Lay out tail stuff. + auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = Offset; + MLC.symtab_command_data.nsyms = O.SymTable.NameList.size(); + Offset += NListSize * MLC.symtab_command_data.nsyms; + MLC.symtab_command_data.stroff = Offset; + Offset += MLC.symtab_command_data.strsize; + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (MLC.dysymtab_command_data.nindirectsyms != 0) + return createStringError(llvm::errc::not_supported, + "indirect symbol table is not yet supported"); + + updateDysymtab(MLC); + break; + } + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOWriter::finalize() { + updateSizeOfCmds(); + + if (auto E = layout()) + return E; + + return Error::success(); +} + Error MachOWriter::write() { if (Error E = B.allocate(totalSize())) return E; Index: llvm/tools/llvm-objcopy/MachO/Object.h =================================================================== --- llvm/tools/llvm-objcopy/MachO/Object.h +++ llvm/tools/llvm-objcopy/MachO/Object.h @@ -34,8 +34,8 @@ }; struct Section { - char Sectname[16]; - char Segname[16]; + std::string Sectname; + std::string Segname; uint64_t Addr; uint64_t Size; uint32_t Offset; @@ -49,6 +49,16 @@ StringRef Content; std::vector Relocations; + + MachO::SectionType getType() const { + return static_cast(Flags & MachO::SECTION_TYPE); + } + + bool isVirtualSection() const { + return (getType() == MachO::S_ZEROFILL || + getType() == MachO::S_GB_ZEROFILL || + getType() == MachO::S_THREAD_LOCAL_ZEROFILL); + } }; struct LoadCommand {