diff --git a/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test b/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test @@ -0,0 +1,128 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objcopy --remove-section __DATA,C %t %t.copy + +# RUN: llvm-readobj --symbols %t.copy | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _A (1) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x2) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: ] + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 432 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: '' + vmaddr: 0 + vmsize: 8 + fileoff: 464 + filesize: 8 + maxprot: 7 + initprot: 7 + nsects: 3 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 0 + offset: 0x000001D0 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '' + - sectname: __data + segname: __DATA + addr: 0x0000000000000000 + size: 4 + offset: 0x000001D0 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '01000000' + - sectname: C + segname: __DATA + addr: 0x0000000000000004 + size: 4 + offset: 0x000001D4 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '02000000' + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 658944 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 472 + nsyms: 2 + stroff: 504 + strsize: 8 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 2 + iundefsym: 2 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 4 + n_type: 0x0F + n_sect: 2 + n_desc: 0 + n_value: 0 + - n_strx: 1 + n_type: 0x0F + n_sect: 3 + n_desc: 0 + n_value: 4 + StringTable: + - '' + - _B + - _A + - '' +... diff --git a/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test b/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test @@ -0,0 +1,120 @@ +# RUN: yaml2obj %s -o %t +# RUN: not llvm-objcopy --remove-section __DATA,C %t /dev/null 2>&1 | FileCheck %s + +# CHECK: symbol '_a' defined in section with index '2' cannot be removed because it is referenced by a relocation in section '__TEXT,__text' + +## The binary used in this test was built as follows: +## main.c: +## __attribute__((section("__DATA,C"))) int a = 2; +## int f() { return a; } +## build command: +## clang -fno-exceptions -fno-unwind-tables -c main.c -o main.o + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 360 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: '' + vmaddr: 0 + vmsize: 16 + fileoff: 392 + filesize: 16 + maxprot: 7 + initprot: 7 + nsects: 2 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 12 + offset: 0x00000188 + align: 4 + reloff: 0x00000198 + nreloc: 1 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E58B05000000005DC3 + relocations: + - address: 0x00000006 + symbolnum: 0 + pcrel: true + length: 2 + extern: true + type: 1 + scattered: false + value: 0 + - sectname: C + segname: __DATA + addr: 0x000000000000000C + size: 4 + offset: 0x00000194 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '02000000' + - cmd: LC_BUILD_VERSION + cmdsize: 24 + platform: 1 + minos: 658944 + sdk: 658944 + ntools: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 416 + nsyms: 2 + stroff: 448 + strsize: 8 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 2 + iundefsym: 2 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 4 + n_type: 0x0F + n_sect: 2 + n_desc: 0 + n_value: 12 + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _f + - _a + - '' +... diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -20,7 +20,7 @@ using namespace object; using SectionPred = std::function &Sec)>; -static void removeSections(const CopyConfig &Config, Object &Obj) { +static Error removeSections(const CopyConfig &Config, Object &Obj) { SectionPred RemovePred = [](const std::unique_ptr
&) { return false; }; if (!Config.ToRemove.empty()) { @@ -180,7 +180,9 @@ return createStringError(llvm::errc::invalid_argument, "option not supported by llvm-objcopy for MachO"); } - removeSections(Config, Obj); + + if (Error E = removeSections(Config, Obj)) + return E; // Mark symbols to determine which symbols are still needed. if (Config.StripAll) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -28,10 +28,11 @@ } template -Section constructSectionCommon(SectionType Sec) { +Section constructSectionCommon(SectionType Sec, uint32_t Index) { StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); Section S(SegName, SectName); + S.Index = Index; S.Addr = Sec.addr; S.Size = Sec.size; S.Offset = Sec.offset; @@ -45,14 +46,15 @@ return S; } -template Section constructSection(SectionType Sec); +template +Section constructSection(SectionType Sec, uint32_t Index); -template <> Section constructSection(MachO::section Sec) { - return constructSectionCommon(Sec); +template <> Section constructSection(MachO::section Sec, uint32_t Index) { + return constructSectionCommon(Sec, Index); } -template <> Section constructSection(MachO::section_64 Sec) { - Section S = constructSectionCommon(Sec); +template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) { + Section S = constructSectionCommon(Sec, Index); S.Reserved3 = Sec.reserved3; return S; } @@ -62,7 +64,7 @@ std::vector> extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, const object::MachOObjectFile &MachOObj, - size_t &NextSectionIndex) { + uint32_t &NextSectionIndex) { auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; const SectionType *Curr = reinterpret_cast(LoadCmd.Ptr + sizeof(SegmentType)); @@ -72,9 +74,11 @@ SectionType Sec; memcpy((void *)&Sec, Curr, sizeof(SectionType)); MachO::swapStruct(Sec); - Sections.push_back(std::make_unique
(constructSection(Sec))); + Sections.push_back( + std::make_unique
(constructSection(Sec, NextSectionIndex))); } else { - Sections.push_back(std::make_unique
(constructSection(*Curr))); + Sections.push_back( + std::make_unique
(constructSection(*Curr, NextSectionIndex))); } Section &S = *Sections.back(); @@ -110,7 +114,7 @@ void MachOReader::readLoadCommands(Object &O) const { // For MachO sections indices start from 1. - size_t NextSectionIndex = 1; + uint32_t NextSectionIndex = 1; for (auto LoadCmd : MachOObj.load_commands()) { LoadCommand LC; switch (LoadCmd.C.cmd) { @@ -189,12 +193,10 @@ for (auto Symbol : MachOObj.symbols()) { SymbolEntry SE = (MachOObj.is64Bit() - ? constructSymbolEntry( - StrTable, - MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) - : constructSymbolEntry( - StrTable, - MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); + ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( + Symbol.getRawDataRefImpl())) + : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( + Symbol.getRawDataRefImpl()))); O.SymTable.Symbols.push_back(std::make_unique(SE)); } diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -37,6 +37,7 @@ struct RelocationInfo; struct Section { + uint32_t Index; std::string Segname; std::string Sectname; // CanonicalName is a string formatted as “,". @@ -83,7 +84,7 @@ // The raw content of the payload of the load command (located right after the // corresponding struct). In some cases it is either empty or can be // copied-over without digging into its structure. - std::vector Payload; + std::vector Payload; // Some load commands can contain (inside the payload) an array of sections, // though the contents of the sections are stored separately. The struct @@ -115,6 +116,10 @@ bool isUndefinedSymbol() const { return (n_type & MachO::N_TYPE) == MachO::N_UNDF; } + + Optional section() const { + return n_sect == MachO::NO_SECT ? None : Optional(n_sect); + } }; /// The location of the symbol table inside the binary is described by LC_SYMTAB @@ -306,7 +311,8 @@ Object() : NewSectionsContents(Alloc) {} - void removeSections(function_ref &)> ToRemove); + Error + removeSections(function_ref &)> ToRemove); void addLoadCommand(LoadCommand LC); /// Creates a new segment load command in the object and returns a reference diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -1,5 +1,7 @@ #include "Object.h" #include "../llvm-objcopy.h" +#include "llvm/ADT/SmallPtrSet.h" +#include namespace llvm { namespace objcopy { @@ -22,11 +24,41 @@ std::end(Symbols)); } -void Object::removeSections(function_ref &)> ToRemove) { - for (LoadCommand &LC : LoadCommands) - LC.Sections.erase(std::remove_if(std::begin(LC.Sections), - std::end(LC.Sections), ToRemove), - std::end(LC.Sections)); +Error Object::removeSections( + function_ref &)> ToRemove) { + std::unordered_set RemovedSectionsIndices; + for (LoadCommand &LC : LoadCommands) { + auto It = std::stable_partition( + std::begin(LC.Sections), std::end(LC.Sections), + [&](const std::unique_ptr
&Sec) { return !ToRemove(Sec); }); + for (auto I = It, End = LC.Sections.end(); I != End; ++I) + RemovedSectionsIndices.insert((*I)->Index); + LC.Sections.erase(It, LC.Sections.end()); + } + + auto IsDead = [&](const std::unique_ptr &S) -> bool { + Optional Section = S->section(); + return (Section && RemovedSectionsIndices.count(*Section)); + }; + + SmallPtrSet DeadSymbols; + for (const std::unique_ptr &Sym : SymTable.Symbols) + if (IsDead(Sym)) + DeadSymbols.insert(Sym.get()); + + for (const LoadCommand &LC : LoadCommands) + for (const std::unique_ptr
&Sec : LC.Sections) + for (const RelocationInfo &R : Sec->Relocations) + if (R.Symbol && DeadSymbols.count(R.Symbol)) + return createStringError(std::errc::invalid_argument, + "symbol '%s' defined in section with index " + "'%u' cannot be removed because it is " + "referenced by a relocation in section '%s'", + R.Symbol->Name.c_str(), + *(R.Symbol->section()), + Sec->CanonicalName.c_str()); + SymTable.removeSymbols(IsDead); + return Error::success(); } void Object::addLoadCommand(LoadCommand LC) {