diff --git a/llvm/test/tools/llvm-objcopy/MachO/binary-input.test b/llvm/test/tools/llvm-objcopy/MachO/binary-input.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/binary-input.test @@ -0,0 +1,102 @@ +# Show that if -I binary is given, llvm-objopy creates a MachO object which +# contains input file contents. + +RUN: not llvm-objcopy -Ibinary -B x86-64 -Omach-o-x86-64 %t.nonexistent %t.o.nonexistent 2>&1 \ +RUN: | FileCheck %s -DINPUT=%t.nonexistent --check-prefix=NONEXSITENT-FILE +NONEXSITENT-FILE: error: '[[INPUT]]': {{[Nn]}}o such file or directory + +RUN: echo -n abcdefg > %t.input +RUN: llvm-objcopy -Ibinary -B x86-64 -Omach-o-x86-64 %t.input %t.o +RUN: llvm-readobj --sections --section-data --symbols --macho-segment --macho-dysymtab %t.o \ +RUN: | FileCheck %s + +CHECK: Format: Mach-O 64-bit x86-64 +CHECK-NEXT: Arch: x86_64 +CHECK-NEXT: AddressSize: 64bit +CHECK-NEXT: Sections [ +CHECK-NEXT: Section { +CHECK-NEXT: Index: 0 +CHECK-NEXT: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00) +CHECK-NEXT: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00) +CHECK-NEXT: Address: 0x0 +CHECK-NEXT: Size: 0x7 +CHECK-NEXT: Offset: 288 +CHECK-NEXT: Alignment: 0 +CHECK-NEXT: RelocationOffset: 0x0 +CHECK-NEXT: RelocationCount: 0 +CHECK-NEXT: Type: Regular (0x0) +CHECK-NEXT: Attributes [ (0x0) +CHECK-NEXT: ] +CHECK-NEXT: Reserved1: 0x0 +CHECK-NEXT: Reserved2: 0x0 +CHECK-NEXT: Reserved3: 0x0 +CHECK-NEXT: SectionData ( +CHECK-NEXT: 0000: 61626364 656667 |abcdefg| +CHECK-NEXT: ) +CHECK-NEXT: } +CHECK-NEXT: ] +CHECK-NEXT: Symbols [ +CHECK-NEXT: Symbol { +CHECK-NEXT: Name: __binary_{{[_a-zA-Z0-9]*}}_size +CHECK-NEXT: Extern +CHECK-NEXT: Type: Abs (0x2) +CHECK-NEXT: Section: (0x0) +CHECK-NEXT: RefType: ReferenceFlagDefined (0x2) +CHECK-NEXT: Flags [ (0x0) +CHECK-NEXT: ] +CHECK-NEXT: Value: 0x7 +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Name: __binary_{{[_a-zA-Z0-9]*}}_start +CHECK-NEXT: Extern +CHECK-NEXT: Type: Section (0xE) +CHECK-NEXT: Section: __data (0x1) +CHECK-NEXT: RefType: ReferenceFlagDefined (0x2) +CHECK-NEXT: Flags [ (0x0) +CHECK-NEXT: ] +CHECK-NEXT: Value: 0x0 +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Name: __binary_{{[_a-zA-Z0-9]*}}_end +CHECK-NEXT: Extern +CHECK-NEXT: Type: Section (0xE) +CHECK-NEXT: Section: __data (0x1) +CHECK-NEXT: RefType: ReferenceFlagDefined (0x2) +CHECK-NEXT: Flags [ (0x0) +CHECK-NEXT: ] +CHECK-NEXT: Value: 0x7 +CHECK-NEXT: } +CHECK-NEXT: ] +CHECK-NEXT: Segment { +CHECK-NEXT: Cmd: LC_SEGMENT_64 +CHECK-NEXT: Name: +CHECK-NEXT: Size: 152 +CHECK-NEXT: vmaddr: 0x0 +CHECK-NEXT: vmsize: 0x7 +CHECK-NEXT: fileoff: 288 +CHECK-NEXT: filesize: 7 +CHECK-NEXT: maxprot: rwx +CHECK-NEXT: initprot: rwx +CHECK-NEXT: nsects: 1 +CHECK-NEXT: flags: 0x0 +CHECK-NEXT: } +CHECK-NEXT: Dysymtab { +CHECK-NEXT: ilocalsym: 0 +CHECK-NEXT: nlocalsym: 0 +CHECK-NEXT: iextdefsym: 0 +CHECK-NEXT: nextdefsym: 3 +CHECK-NEXT: iundefsym: 3 +CHECK-NEXT: nundefsym: 0 +CHECK-NEXT: tocoff: 0 +CHECK-NEXT: ntoc: 0 +CHECK-NEXT: modtaboff: 0 +CHECK-NEXT: nmodtab: 0 +CHECK-NEXT: extrefsymoff: 0 +CHECK-NEXT: nextrefsyms: 0 +CHECK-NEXT: indirectsymoff: 0 +CHECK-NEXT: nindirectsyms: 0 +CHECK-NEXT: extreloff: 0 +CHECK-NEXT: nextrel: 0 +CHECK-NEXT: locreloff: 0 +CHECK-NEXT: nlocrel: 0 +CHECK-NEXT: } \ No newline at end of file diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h @@ -9,8 +9,11 @@ #ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H #define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H +#include "llvm/Support/MemoryBuffer.h" + namespace llvm { class Error; +class MemoryBuffer; namespace object { class MachOObjectFile; @@ -22,6 +25,8 @@ class Buffer; namespace macho { +Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, + Buffer &Out); Error executeObjcopyOnBinary(const CopyConfig &Config, object::MachOObjectFile &In, Buffer &Out); } // end namespace macho diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -166,6 +166,29 @@ return Error::success(); } +Error writeOutput(Object &O, Buffer &Out, bool Is64Bit, bool IsLittleEndian) { + // TODO: Support 16KB pages which are employed in iOS arm64 binaries: + // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb + const uint64_t PageSize = 4096; + + MachOWriter Writer(O, Is64Bit, IsLittleEndian, PageSize, Out); + if (auto E = Writer.finalize()) + return E; + return Writer.write(); +} + +Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, + Buffer &Out) { + const MachineInfo &MI = Config.BinaryArch; + BinaryReader Reader(MI, In); + std::unique_ptr O = Reader.create(); + + if (Error E = handleArgs(Config, *O)) + return createFileError(Config.InputFilename, std::move(E)); + + return writeOutput(*O, Out, MI.Is64Bit, MI.IsLittleEndian); +} + Error executeObjcopyOnBinary(const CopyConfig &Config, object::MachOObjectFile &In, Buffer &Out) { MachOReader Reader(In); @@ -179,14 +202,7 @@ if (Error E = handleArgs(Config, *O)) return createFileError(Config.InputFilename, std::move(E)); - // TODO: Support 16KB pages which are employed in iOS arm64 binaries: - // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb - const uint64_t PageSize = 4096; - - MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); - if (auto E = Writer.finalize()) - return E; - return Writer.write(); + return writeOutput(*O, Out, In.is64Bit(), In.isLittleEndian()); } } // end namespace macho diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// +#include "../CopyConfig.h" #include "MachOObjcopy.h" #include "Object.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/MachO.h" +#include "llvm/Support/MemoryBuffer.h" #include namespace llvm { @@ -46,6 +48,20 @@ std::unique_ptr create() const override; }; +class BinaryReader : public Reader { + const MachineInfo &MI; + MemoryBuffer &Input; + std::unique_ptr SectionContent; + +public: + BinaryReader(const MachineInfo &MI, MemoryBuffer &Input) + : MI(MI), Input(Input) { + SectionContent = + WritableMemoryBuffer::getNewUninitMemBuffer(Input.getBufferSize()); + } + std::unique_ptr create() const override; +}; + } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -173,13 +173,8 @@ SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { assert(nlist.n_strx < StrTable.size() && "n_strx exceeds the size of the string table"); - SymbolEntry SE; - SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); - SE.n_type = nlist.n_type; - SE.n_sect = nlist.n_sect; - SE.n_desc = nlist.n_desc; - SE.n_value = nlist.n_value; - return SE; + return SymbolEntry(StringRef(StrTable.data() + nlist.n_strx).str(), + nlist.n_type, nlist.n_sect, nlist.n_desc, nlist.n_value); } void MachOReader::readSymbolTable(Object &O) const { @@ -280,6 +275,84 @@ return Obj; } +std::unique_ptr BinaryReader::create() const { + assert(MI.Is64Bit && "32-bit object is not yet supported"); + auto Obj = std::make_unique(); + + size_t HeaderSize = + MI.Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + size_t SegLoadCommandSize = + MI.Is64Bit + ? (sizeof(MachO::segment_command_64) + sizeof(MachO::section_64)) + : (sizeof(MachO::segment_command) + sizeof(MachO::section)); + size_t SizeOfCmds = SegLoadCommandSize + sizeof(MachO::symtab_command) + + sizeof(MachO::dysymtab_command); + uint64_t SegOffset = HeaderSize + SizeOfCmds; + Obj->Header.Magic = MI.Is64Bit ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC; + Obj->Header.CPUType = MI.MachOCPUType; + Obj->Header.CPUSubType = MI.MachOCPUSubType; + Obj->Header.FileType = MachO::MH_OBJECT; + Obj->Header.NCmds = 1; + Obj->Header.Flags = 0; + Obj->Header.SizeOfCmds = SizeOfCmds; + + LoadCommand &LC = Obj->addSegment(""); + MachO::segment_command_64 &Seg = LC.MachOLoadCommand.segment_command_64_data; + Seg.vmsize = SectionContent->getBufferSize(); + Seg.fileoff = SegOffset; + Seg.filesize = SectionContent->getBufferSize(); + Seg.maxprot = + MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; + Seg.initprot = + MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; + + Section Sec("__DATA", "__data"); + Sec.Size = SectionContent->getBufferSize(); + Sec.Offset = SegOffset; + memcpy(SectionContent->getBufferStart(), Input.getBufferStart(), + Input.getBufferSize()); + Sec.setOwnedContentData(ArrayRef( + reinterpret_cast(SectionContent->getBufferStart()), + SectionContent->getBufferSize())); + LC.Sections.push_back(Sec); + + LoadCommand DySymTab; + MachO::dysymtab_command &DySymTabData = + DySymTab.MachOLoadCommand.dysymtab_command_data; + memset(&DySymTabData, 0, sizeof(DySymTabData)); + DySymTabData.cmd = MachO::LC_DYSYMTAB; + DySymTabData.cmdsize = sizeof(MachO::dysymtab_command); + + LoadCommand SymTab; + MachO::symtab_command &SymTabData = + SymTab.MachOLoadCommand.symtab_command_data; + memset(&SymTabData, 0, sizeof(SymTabData)); + SymTabData.cmd = MachO::LC_SYMTAB; + SymTabData.cmdsize = sizeof(MachO::symtab_command); + + Obj->LoadCommands.push_back(DySymTab); + Obj->DySymTabCommandIndex = Obj->LoadCommands.size() - 1; + Obj->LoadCommands.push_back(SymTab); + Obj->SymTabCommandIndex = Obj->LoadCommands.size() - 1; + + std::string SanitizedFilename = Input.getBufferIdentifier().str(); + // Replace characters that are not valid for symbol name. + std::replace_if( + std::begin(SanitizedFilename), std::end(SanitizedFilename), + [](char C) { return !isalnum(C); }, '_'); + Twine Prefix = Twine("__binary_") + SanitizedFilename; + + Obj->SymTable.addSymbol(Prefix + "_size", MachO::N_ABS | MachO::N_EXT, 0, + MachO::REFERENCE_FLAG_DEFINED, Input.getBufferSize()); + Obj->SymTable.addSymbol(Prefix + "_start", MachO::N_SECT | MachO::N_EXT, 1, + MachO::REFERENCE_FLAG_DEFINED, 0); + Obj->SymTable.addSymbol(Prefix + "_end", MachO::N_SECT | MachO::N_EXT, 1, + MachO::REFERENCE_FLAG_DEFINED, + SectionContent->getBufferSize()); + + return Obj; +} + } // end namespace macho } // end namespace objcopy } // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -173,29 +173,35 @@ } #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - case MachO::LCName: \ + case MachO::LCName: { \ assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ MLC.load_command_data.cmdsize); \ if (IsLittleEndian != sys::IsLittleEndianHost) \ MachO::swapStruct(MLC.LCStruct##_data); \ memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ Begin += sizeof(MachO::LCStruct); \ - memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ + /* Call memcpy only if it's not empty due to a nonnull attribute of */ \ + /* memcpy's 2nd argument. */ \ + if (LC.Payload.size() > 0) \ + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ Begin += LC.Payload.size(); \ - break; + break; \ + } // Copy the load command as it is. switch (MLC.load_command_data.cmd) { - default: + default: { assert(sizeof(MachO::load_command) + LC.Payload.size() == MLC.load_command_data.cmdsize); if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct(MLC.load_command_data); memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); Begin += sizeof(MachO::load_command); - memcpy(Begin, LC.Payload.data(), LC.Payload.size()); + if (LC.Payload.size() > 0) + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); Begin += LC.Payload.size(); break; + } #include "llvm/BinaryFormat/MachO.def" } } diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -123,6 +123,10 @@ uint16_t n_desc; uint64_t n_value; + SymbolEntry(std::string Name, uint8_t Type, uint8_t Sect, uint16_t Desc, + uint64_t Value) + : Name(Name), n_type(Type), n_sect(Sect), n_desc(Desc), n_value(Value) {} + bool isExternalSymbol() const { return n_type & ((MachO::N_EXT | MachO::N_PEXT)); } @@ -143,6 +147,8 @@ SymbolEntry *getSymbolByIndex(uint32_t Index); void removeSymbols( function_ref &)> ToRemove); + void addSymbol(Twine Name, uint8_t Type, uint8_t Sect, uint16_t Desc, + uint64_t Value); }; struct IndirectSymbolEntry { diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -25,6 +25,12 @@ std::end(Symbols)); } +void SymbolTable::addSymbol(Twine Name, uint8_t Type, uint8_t Sect, + uint16_t Desc, uint64_t Value) { + Symbols.push_back( + std::make_unique(Name.str(), Type, Sect, Desc, Value)); +} + void Object::removeSections(function_ref ToRemove) { for (LoadCommand &LC : LoadCommands) LC.Sections.erase(std::remove_if(std::begin(LC.Sections), diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -141,6 +141,8 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { switch (Config.OutputFormat) { + case FileFormat::MachO: + return macho::executeObjcopyOnRawBinary(Config, In, Out); case FileFormat::ELF: // FIXME: Currently, we call elf::executeObjcopyOnRawBinary even if the // output format is binary/ihex or it's not given. This behavior differs from