Index: llvm/include/llvm/Object/MachO.h =================================================================== --- llvm/include/llvm/Object/MachO.h +++ llvm/include/llvm/Object/MachO.h @@ -311,6 +311,9 @@ bool isSectionBitcode(DataRefImpl Sec) const override; bool isDebugSection(DataRefImpl Sec) const override; + /// Return the raw contents of an entire segment. + ArrayRef getSegmentContents(StringRef SegmentName) const; + /// When dsymutil generates the companion file, it strips all unnecessary /// sections (e.g. everything in the _TEXT segment) by omitting their body /// and setting the offset in their corresponding load command to zero. Index: llvm/include/llvm/ObjectYAML/MachOYAML.h =================================================================== --- llvm/include/llvm/ObjectYAML/MachOYAML.h +++ llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -131,6 +131,7 @@ std::vector LoadCommands; std::vector
Sections; LinkEditData LinkEdit; + Optional RawLinkEditSegment; DWARFYAML::Data DWARF; }; Index: llvm/lib/Object/MachOObjectFile.cpp =================================================================== --- llvm/lib/Object/MachOObjectFile.cpp +++ llvm/lib/Object/MachOObjectFile.cpp @@ -2048,6 +2048,43 @@ SectionName == "__swift_ast"; } +template +ArrayRef getSegmentContents(const MachOObjectFile &Obj, + MachOObjectFile::LoadCommandInfo LoadCmd, + StringRef SegmentName) { + auto SegmentOrErr = getStructOrErr(Obj, LoadCmd.Ptr); + if (!SegmentOrErr) { + consumeError(SegmentOrErr.takeError()); + return {}; + } + auto &Segment = SegmentOrErr.get(); + if (StringRef(Segment.segname, 16).startswith(SegmentName)) + return arrayRefFromStringRef(Obj.getData().slice( + Segment.fileoff, Segment.fileoff + Segment.filesize)); + return {}; +} + +ArrayRef MachOObjectFile::getSegmentContents(StringRef SegmentName) const { + for (auto LoadCmd : load_commands()) { + ArrayRef Contents; + switch (LoadCmd.C.cmd) { + case MachO::LC_SEGMENT: + Contents = ::getSegmentContents(*this, LoadCmd, + SegmentName); + break; + case MachO::LC_SEGMENT_64: + Contents = ::getSegmentContents(*this, LoadCmd, + SegmentName); + break; + default: + continue; + } + if (!Contents.empty()) + return Contents; + } + return {}; +} + unsigned MachOObjectFile::getSectionID(SectionRef Sec) const { return Sec.getRawDataRefImpl().d.a; } Index: llvm/lib/ObjectYAML/MachOEmitter.cpp =================================================================== --- llvm/lib/ObjectYAML/MachOEmitter.cpp +++ llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -288,6 +288,7 @@ } Error MachOWriter::writeSectionData(raw_ostream &OS) { + uint64_t LinkEditOff = 0; for (auto &LC : Obj.LoadCommands) { switch (LC.Data.load_command_data.cmd) { case MachO::LC_SEGMENT: @@ -297,6 +298,9 @@ if (0 == strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) { FoundLinkEditSeg = true; + LinkEditOff = segOff; + if (Obj.RawLinkEditSegment) + continue; writeLinkEditData(OS); } for (auto &Sec : LC.Sections) { @@ -344,6 +348,13 @@ } } + if (Obj.RawLinkEditSegment) { + ZeroToOffset(OS, LinkEditOff); + if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff) + return createStringError(errc::invalid_argument, + "section offsets don't line up"); + Obj.RawLinkEditSegment->writeAsBinary(OS); + } return Error::success(); } Index: llvm/lib/ObjectYAML/MachOYAML.cpp =================================================================== --- llvm/lib/ObjectYAML/MachOYAML.cpp +++ llvm/lib/ObjectYAML/MachOYAML.cpp @@ -110,9 +110,12 @@ Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 || Object.Header.magic == MachO::MH_CIGAM_64; IO.mapOptional("LoadCommands", Object.LoadCommands); + + if(Object.RawLinkEditSegment || !IO.outputting()) + IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment); if(!Object.LinkEdit.isEmpty() || !IO.outputting()) IO.mapOptional("LinkEditData", Object.LinkEdit); - + if(!Object.DWARF.isEmpty() || !IO.outputting()) IO.mapOptional("DWARF", Object.DWARF); Index: llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml =================================================================== --- /dev/null +++ llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml @@ -0,0 +1,184 @@ +# Test that obj2yaml + yaml2obj can round-trip mach-o executables with +# raw __LINKEDIT segments. +# +# RUN: yaml2obj %s | obj2yaml --raw-linkedit-segment --raw-data-segment | FileCheck %s +# +# This file was produced using: +# echo "int ext;" > a.c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib +# echo "extern int ext;" > b.c +# echo "int padding;" >> b.c +# echo "int *p = &ext + 4;" >> b.c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la +# +# CHECK: - sectname: __data +# CHECK: segname: __DATA +# CHECK: content: '0000001000000080' +# CHECK: __LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000 + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x6 + ncmds: 16 + sizeofcmds: 816 + flags: 0x100085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x4000 + size: 0 + offset: 0x4000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 16384 + vmsize: 16384 + fileoff: 16384 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __data + segname: __DATA + addr: 0x4000 + size: 8 + offset: 0x4000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000001000000080' + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 32768 + vmsize: 16384 + fileoff: 32768 + filesize: 160 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_ID_DYLIB + cmdsize: 64 + dylib: + name: 24 + timestamp: 1 + current_version: 0 + compatibility_version: 0 + Content: '@executable_path/libfixups.dylib' + ZeroPadBytes: 8 + - cmd: LC_DYLD_CHAINED_FIXUPS + cmdsize: 16 + dataoff: 32768 + datasize: 88 + - cmd: LC_DYLD_EXPORTS_TRIE + cmdsize: 16 + dataoff: 32856 + datasize: 16 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 32880 + nsyms: 2 + stroff: 32912 + strsize: 16 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_UUID + cmdsize: 24 + uuid: 56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 2 + minos: 983296 + sdk: 983552 + ntools: 1 + Tools: + - tool: 3 + version: 46596096 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_ENCRYPTION_INFO_64 + cmdsize: 24 + cryptoff: 16384 + cryptsize: 0 + cryptid: 0 + pad: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 0 + compatibility_version: 0 + Content: '@executable_path/liba.dylib' + ZeroPadBytes: 5 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 85917696 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 32872 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 32880 + datasize: 0 +__LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000 +... Index: llvm/tools/obj2yaml/macho2yaml.cpp =================================================================== --- llvm/tools/obj2yaml/macho2yaml.cpp +++ llvm/tools/obj2yaml/macho2yaml.cpp @@ -29,6 +29,8 @@ const object::MachOObjectFile &Obj; std::unique_ptr DWARFCtx; + bool RawDataSegment; + bool RawLinkEditSegment; void dumpHeader(std::unique_ptr &Y); Error dumpLoadCommands(std::unique_ptr &Y); void dumpLinkEdit(std::unique_ptr &Y); @@ -52,8 +54,10 @@ public: MachODumper(const object::MachOObjectFile &O, - std::unique_ptr DCtx) - : Obj(O), DWARFCtx(std::move(DCtx)) {} + std::unique_ptr DCtx, bool RawDataSegment, + bool RawLinkEditSegment) + : Obj(O), DWARFCtx(std::move(DCtx)), RawDataSegment(RawDataSegment), + RawLinkEditSegment(RawLinkEditSegment) {} Expected> dump(); }; @@ -176,6 +180,11 @@ if (Expected S = constructSection(Sec, Sections.size() + 1)) { StringRef SecName(S->sectname); + + // Copy data sections if requested. + if (RawDataSegment && StringRef(S->segname).startswith("__DATA")) + S->content = yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); + if (SecName.startswith("__debug_")) { // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. @@ -282,7 +291,11 @@ dumpHeader(Y); if (Error Err = dumpLoadCommands(Y)) return std::move(Err); - dumpLinkEdit(Y); + if (RawLinkEditSegment) + Y->RawLinkEditSegment = + yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT")); + else + dumpLinkEdit(Y); return std::move(Y); } @@ -587,9 +600,10 @@ } } -Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { +Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj, + bool RawDataSegment, bool RawLinkEditSegment) { std::unique_ptr DCtx = DWARFContext::create(Obj); - MachODumper Dumper(Obj, std::move(DCtx)); + MachODumper Dumper(Obj, std::move(DCtx), RawDataSegment, RawLinkEditSegment); Expected> YAML = Dumper.dump(); if (!YAML) return YAML.takeError(); @@ -602,7 +616,8 @@ return Error::success(); } -Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { +Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj, + bool RawDataSegment, bool RawLinkEditSegment) { yaml::YamlObjectFile YAMLFile; YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; @@ -624,7 +639,8 @@ return SliceObj.takeError(); std::unique_ptr DCtx = DWARFContext::create(*SliceObj.get()); - MachODumper Dumper(*SliceObj.get(), std::move(DCtx)); + MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawDataSegment, + RawLinkEditSegment); Expected> YAMLObj = Dumper.dump(); if (!YAMLObj) return YAMLObj.takeError(); @@ -636,12 +652,13 @@ return Error::success(); } -Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) { +Error macho2yaml(raw_ostream &Out, const object::Binary &Binary, + bool RawDataSegment, bool RawLinkEditSegment) { if (const auto *MachOObj = dyn_cast(&Binary)) - return macho2yaml(Out, *MachOObj); + return macho2yaml(Out, *MachOObj, RawDataSegment, RawLinkEditSegment); if (const auto *MachOObj = dyn_cast(&Binary)) - return macho2yaml(Out, *MachOObj); + return macho2yaml(Out, *MachOObj, RawDataSegment, RawLinkEditSegment); llvm_unreachable("unexpected Mach-O file format"); } Index: llvm/tools/obj2yaml/obj2yaml.h =================================================================== --- llvm/tools/obj2yaml/obj2yaml.h +++ llvm/tools/obj2yaml/obj2yaml.h @@ -24,8 +24,8 @@ const llvm::object::COFFObjectFile &Obj); llvm::Error elf2yaml(llvm::raw_ostream &Out, const llvm::object::ObjectFile &Obj); -llvm::Error macho2yaml(llvm::raw_ostream &Out, - const llvm::object::Binary &Obj); +llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj, + bool RawDataSegment, bool RawLinkeditSegment); llvm::Error minidump2yaml(llvm::raw_ostream &Out, const llvm::object::MinidumpFile &Obj); std::error_code xcoff2yaml(llvm::raw_ostream &Out, Index: llvm/tools/obj2yaml/obj2yaml.cpp =================================================================== --- llvm/tools/obj2yaml/obj2yaml.cpp +++ llvm/tools/obj2yaml/obj2yaml.cpp @@ -18,6 +18,19 @@ using namespace llvm; using namespace llvm::object; +static cl::opt + InputFilename(cl::Positional, cl::desc(""), cl::init("-")); +static cl::opt + RawDataSegment("raw-data-segment", + cl::desc("Mach-O: dump the raw contents of the __DATA " + "segment instead of parsing it"), + cl::init(false)); +static cl::opt RawLinkeditSegment( + "raw-linkedit-segment", + cl::desc("Mach-O: dump the raw contents of the __LINKEDIT " + "segment instead of parsing it"), + cl::init(false)); + static Error dumpObject(const ObjectFile &Obj) { if (Obj.isCOFF()) return errorCodeToError(coff2yaml(outs(), cast(Obj))); @@ -54,7 +67,7 @@ // Universal MachO is not a subclass of ObjectFile, so it needs to be handled // here with the other binary types. if (Binary.isMachO() || Binary.isMachOUniversalBinary()) - return macho2yaml(outs(), Binary); + return macho2yaml(outs(), Binary, RawDataSegment, RawLinkeditSegment); if (ObjectFile *Obj = dyn_cast(&Binary)) return dumpObject(*Obj); if (MinidumpFile *Minidump = dyn_cast(&Binary)) @@ -74,9 +87,6 @@ errs().flush(); } -cl::opt InputFilename(cl::Positional, cl::desc(""), - cl::init("-")); - int main(int argc, char *argv[]) { InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv);