diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -67,6 +67,15 @@ Print a summary of command line options. +.. option:: --only-keep-debug + + Produce a debug file as the output that only preserves contents of sections + useful for debugging purposes. + + For ELF objects, this removes the contents of `SHF_ALLOC` sections that are not + `SHT_NOTE` by making them `SHT_NOBITS` and shrinking the program headers where + possible. + .. option:: --only-section <section>, -j Remove all sections from the output, except for sections named ``<section>``. @@ -177,11 +186,6 @@ objects, :program:`llvm-objcopy` will either emit an error or silently ignore them. -.. option:: --only-keep-debug - - Remove the contents of non-debug sections from the output, but keep the section - headers. - ELF-SPECIFIC OPTIONS -------------------- diff --git a/llvm/test/tools/llvm-objcopy/ELF/basic-only-keep-debug.test b/llvm/test/tools/llvm-objcopy/ELF/basic-only-keep-debug.test deleted file mode 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/basic-only-keep-debug.test +++ /dev/null @@ -1,22 +0,0 @@ -# NOTE: This test is only intended to be valid as long as --only-keep-debug is -# implemented as a NOP. This test should fail when that changes and you -# will need to update this test. - -# RUN: yaml2obj %s > %t -# RUN: llvm-objcopy %t %t2 -# RUN: llvm-objcopy --only-keep-debug %t %t3 -# RUN: cmp %t2 %t3 -# RUN: llvm-strip --only-keep-debug --no-strip-all %t -o %t4 -# RUN: cmp %t2 %t4 - -!ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - Content: "DEADBEEF" diff --git a/llvm/test/tools/llvm-objcopy/ELF/only-keep-debug.test b/llvm/test/tools/llvm-objcopy/ELF/only-keep-debug.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/only-keep-debug.test @@ -0,0 +1,224 @@ +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-objcopy --only-keep-debug %t1 %t1.dbg +# RUN: llvm-readelf -S -l -x .note1 -x .note2 -x .debug_abbrev -x .debug_frame -x .debug_info %t1.dbg | FileCheck %s + +## Check that SHT_NOTE and .debug* are kept, but others are changed to SHT_NOBITS. +## SHT_NOBITS sections do not occupy space in the output. + +# CHECK: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: [ 1] .note1 NOTE 0000000000000400 000400 000001 00 A 0 0 1024 +# CHECK-NEXT: [ 2] .note2 NOTE 0000000000000401 000401 000001 00 A 0 0 0 +# CHECK-NEXT: [ 3] .text NOBITS 0000000000000402 000402 000001 00 AX 0 0 0 +# CHECK-NEXT: [ 4] .tdata NOBITS 0000000000001480 000480 000007 00 WAT 0 0 128 +# CHECK-NEXT: [ 5] .tbss NOBITS 0000000000001487 000480 000005 00 WAT 0 0 0 +# CHECK-NEXT: [ 6] .bss NOBITS 00000000000014a0 000480 00003f 00 WA 0 0 32 +## objcopy sets sh_offset to 0x402. We don't do this to keep sh_offset non-decreasing. +# CHECK-NEXT: [ 7] .debug_abbrev PROGBITS 0000000000000000 000480 000001 00 0 0 0 +# CHECK-NEXT: [ 8] .debug_frame PROGBITS 0000000000000000 000488 000001 00 0 0 8 +# CHECK-NEXT: [ 9] .debug_info PROGBITS 0000000000000000 000489 000001 00 0 0 0 +# CHECK-NEXT: [10] .strtab STRTAB 0000000000000000 00048a 000001 00 0 0 1 +# CHECK-NEXT: [11] .shstrtab STRTAB 0000000000000000 00048b 000060 00 0 0 1 + +# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000402 0x000403 R E 0x1000 +# CHECK-NEXT: LOAD 0x000480 0x0000000000001480 0x0000000000000000 0x000000 0x00005f RW 0x1000 +# CHECK-NEXT: TLS 0x000480 0x0000000000001480 0x0000000000000000 0x000000 0x00000c RW 0x80 +# CHECK-NEXT: NOTE 0x000400 0x0000000000000400 0x0000000000000000 0x000002 0x000002 0x400 + +## Contents of SHT_NOTE and .debug* are kept. + +# CHECK: Hex dump of section '.note1': +# CHECK-NEXT: 0x00000400 01 +# CHECK: Hex dump of section '.note2': +# CHECK-NEXT: 0x00000401 02 +# CHECK: Hex dump of section '.debug_abbrev': +# CHECK-NEXT: 0x00000000 03 +# CHECK: Hex dump of section '.debug_frame': +# CHECK-NEXT: 0x00000000 04 +# CHECK: Hex dump of section '.debug_info': +# CHECK-NEXT: 0x00000000 05 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .note1 + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x400 + AddressAlign: 0x400 + Content: 01 + - Name: .note2 + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x401 + Content: 02 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x402 + Content: c3 + - Name: .tdata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_WRITE, SHF_TLS ] + Address: 0x1480 # Ensure Address=0x1000+Offset + AddressAlign: 0x80 + # An arbitrary non-zero Size tests that .tdata does not occupy space + # and we can rewrite p_filesz of PT_TLS. + Size: 7 + - Name: .tbss + Type: SHT_NOBITS + Flags: [ SHF_ALLOC, SHF_WRITE, SHF_TLS ] + Address: 0x1487 # Ensure Address=0x1000+Offset + Size: 5 + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_ALLOC, SHF_WRITE ] + Address: 0x14a0 # Ensure Address=0x1000+Offset + AddressAlign: 0x20 + # An arbitrary non-zero Size tests that .bss does not occupy space. + Size: 63 + - Name: .debug_abbrev + Type: SHT_PROGBITS + Content: 03 + - Name: .debug_frame + Type: SHT_PROGBITS + # AddressAlign tests the file offset assignment leaves a gap. + AddressAlign: 0x8 + Content: 04 + - Name: .debug_info + Type: SHT_PROGBITS + Content: 05 +ProgramHeaders: + - Type: PT_LOAD + Flags: [ PF_R, PF_X ] + Offset: 0 + Align: 0x1000 + Sections: + - Section: .note1 + - Section: .note2 + - Section: .text + - Type: PT_LOAD + Flags: [ PF_R, PF_W ] + VAddr: 0x1480 # Ensure Offset=VAddr (mod Align) if Offset changes + Align: 0x1000 + Sections: + - Section: .tdata + - Section: .bss + - Type: PT_TLS + Flags: [ PF_R, PF_W ] + VAddr: 0x1480 # Ensure Offset=VAddr (mod Align) if Offset changes + Sections: + - Section: .tdata + - Section: .tbss + - Type: PT_NOTE + VAddr: 0x400 + Sections: + - Section: .note1 + - Section: .note2 +... + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: llvm-objcopy --only-keep-debug %t2 %t2.dbg +# RUN: llvm-readelf -S -l %t2.dbg | FileCheck --check-prefix=CHECK2 %s + +## Only the tail of a segment can be trimmed. .text still occupies space because +## it is followed by .note which is not SHT_NOBITS. +# CHECK2: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK2: [ 1] .text NOBITS 0000000000000200 000200 000001 00 AX 0 0 512 +# CHECK2-NEXT: [ 2] .note NOTE 0000000000000201 000201 000001 00 A 0 0 0 +# CHECK2-NEXT: [ 3] .debug_info PROGBITS 0000000000000000 000220 000001 00 0 0 32 +# CHECK2-NEXT: [ 4] .strtab STRTAB 0000000000000000 000221 000001 00 0 0 1 +# CHECK2-NEXT: [ 5] .shstrtab STRTAB 0000000000000000 000222 00002b 00 0 0 1 + +## Check that p_offset or p_filesz of empty segments or PT_PHDR are not modified. +# CHECK2: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK2-NEXT: PHDR 0x000040 0x0000000000000040 0x0000000000000000 0x0000a8 0x0000a8 R 0x8 +# CHECK2-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000202 0x000202 R E 0x1000 +# CHECK2-NEXT: LOAD 0x000202 0x0000000000000202 0x0000000000000000 0x00000e 0x00000e RW 0x1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x200 + AddressAlign: 0x200 + Content: c3 + - Name: .note + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x201 + Content: 01 + - Name: .debug_info + Type: SHT_PROGBITS + AddressAlign: 0x20 + Content: 02 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + Offset: 0x40 + VAddr: 0x40 + # 3 * sizeof(Elf64_Phdr) = 0xa8 + FileSize: 0xa8 + MemSize: 0xa8 + Align: 8 + - Type: PT_LOAD + Flags: [ PF_R, PF_X ] + Offset: 0 + Align: 4096 + Sections: + - Section: .text + - Section: .note + - Type: PT_LOAD + Flags: [ PF_R, PF_W ] + Offset: 0x202 + VAddr: 0x202 + FileSize: 14 + MemSize: 14 +... + +## If .symtab or .strtab has the SHF_ALLOC flag, it will be changed to SHT_NOBITS. +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: llvm-objcopy --only-keep-debug %t3 %t3.dbg +# RUN: llvm-readelf -S -l %t3.dbg | FileCheck --check-prefix=CHECK3 %s + +# CHECK3: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK3: [ 1] .dynsym NOBITS 0000000000000000 000040 000018 18 A 2 1 1024 +# CHECK3-NEXT: [ 2] .dynstr NOBITS 0000000000000000 000040 000001 00 A 0 0 0 +# CHECK3-NEXT: [ 3] .symtab NOBITS 0000000000000000 000040 000018 18 A 4 1 0 +# CHECK3-NEXT: [ 4] .strtab NOBITS 0000000000000000 000040 000001 00 A 0 0 0 +# CHECK3-NEXT: [ 5] .shstrtab STRTAB 0000000000000000 000040 00002b 00 0 0 1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + Link: .dynstr + AddressAlign: 0x400 + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + - Name: .symtab + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Link: .strtab + - Name: .strtab + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] +DynamicSymbols: [] +Symbols: [] diff --git a/llvm/tools/llvm-objcopy/CommonOpts.td b/llvm/tools/llvm-objcopy/CommonOpts.td --- a/llvm/tools/llvm-objcopy/CommonOpts.td +++ b/llvm/tools/llvm-objcopy/CommonOpts.td @@ -86,8 +86,9 @@ def only_keep_debug : Flag<["--"], "only-keep-debug">, - HelpText<"Clear sections that would not be stripped by --strip-debug. " - "Currently only implemented for COFF.">; + HelpText< + "Produce a debug file as the output that only preserves contents of " + "sections useful for debugging purposes">; def discard_locals : Flag<["--"], "discard-locals">, HelpText<"Remove compiler-generated local symbols, (e.g. " diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -136,17 +136,17 @@ // Depending on the initial ELFT and OutputFormat we need a different Writer. switch (OutputElfType) { case ELFT_ELF32LE: - return std::make_unique<ELFWriter<ELF32LE>>(Obj, Buf, - !Config.StripSections); + return std::make_unique<ELFWriter<ELF32LE>>(Obj, Buf, !Config.StripSections, + Config.OnlyKeepDebug); case ELFT_ELF64LE: - return std::make_unique<ELFWriter<ELF64LE>>(Obj, Buf, - !Config.StripSections); + return std::make_unique<ELFWriter<ELF64LE>>(Obj, Buf, !Config.StripSections, + Config.OnlyKeepDebug); case ELFT_ELF32BE: - return std::make_unique<ELFWriter<ELF32BE>>(Obj, Buf, - !Config.StripSections); + return std::make_unique<ELFWriter<ELF32BE>>(Obj, Buf, !Config.StripSections, + Config.OnlyKeepDebug); case ELFT_ELF64BE: - return std::make_unique<ELFWriter<ELF64BE>>(Obj, Buf, - !Config.StripSections); + return std::make_unique<ELFWriter<ELF64BE>>(Obj, Buf, !Config.StripSections, + Config.OnlyKeepDebug); } llvm_unreachable("Invalid output format"); } @@ -694,6 +694,11 @@ } } + if (Config.OnlyKeepDebug) + for (auto &Sec : Obj.sections()) + if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) + Sec.Type = SHT_NOBITS; + for (const auto &Flag : Config.AddSection) { std::pair<StringRef, StringRef> SecPair = Flag.split("="); StringRef SecName = SecPair.first; diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h --- a/llvm/tools/llvm-objcopy/ELF/Object.h +++ b/llvm/tools/llvm-objcopy/ELF/Object.h @@ -342,9 +342,13 @@ virtual ~ELFWriter() {} bool WriteSectionHeaders; + // For --only-keep-debug, select an alternative section/segment layout + // algorithm. + bool OnlyKeepDebug; + Error finalize() override; Error write() override; - ELFWriter(Object &Obj, Buffer &Buf, bool WSH); + ELFWriter(Object &Obj, Buffer &Buf, bool WSH, bool OnlyKeepDebug); }; class BinaryWriter : public Writer { @@ -435,8 +439,6 @@ } }; - std::set<const SectionBase *, SectionCompare> Sections; - public: uint32_t Type; uint32_t Flags; @@ -451,6 +453,7 @@ uint64_t OriginalOffset; Segment *ParentSegment = nullptr; ArrayRef<uint8_t> Contents; + std::set<const SectionBase *, SectionCompare> Sections; explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {} Segment() {} diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -1797,10 +1797,9 @@ template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() { for (Segment &Seg : Obj.segments()) { - uint8_t *B = Buf.getBufferStart() + Seg.Offset; - assert(Seg.FileSize == Seg.getContents().size() && - "Segment size must match contents size"); - std::memcpy(B, Seg.getContents().data(), Seg.FileSize); + size_t Size = std::min<size_t>(Seg.FileSize, Seg.getContents().size()); + std::memcpy(Buf.getBufferStart() + Seg.Offset, Seg.getContents().data(), + Size); } // Iterate over removed sections and overwrite their old data with zeroes. @@ -1815,8 +1814,10 @@ } template <class ELFT> -ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH) - : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs) {} +ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH, + bool OnlyKeepDebug) + : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs), + OnlyKeepDebug(OnlyKeepDebug) {} Error Object::removeSections(bool AllowBrokenLinks, std::function<bool(const SectionBase &)> ToRemove) { @@ -1957,6 +1958,78 @@ return Offset; } +// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus +// occupy no space in the file. +static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) { + uint32_t Index = 1; + for (auto &Sec : Obj.sections()) { + Sec.Index = Index++; + + auto *FirstSec = Sec.ParentSegment && Sec.ParentSegment->Type == PT_LOAD + ? Sec.ParentSegment->firstSection() + : nullptr; + + // The first section in a PT_LOAD has to have congruent offset and address + // modulo the alignment, which usually equals the maximum page size. + if (FirstSec && FirstSec == &Sec) + Off = alignTo(Off, Sec.ParentSegment->Align, Sec.Addr); + + // sh_offset is not significant for SHT_NOBITS sections, but the congruence + // rule must be followed if it is the first section in a PT_LOAD. Do not + // advance Off. + if (Sec.Type == SHT_NOBITS) { + Sec.Offset = Off; + continue; + } + + if (!FirstSec) { + // FirstSec being nullptr generally means that Sec does not have the + // SHF_ALLOC flag. + Off = Sec.Align ? alignTo(Off, Sec.Align) : Off; + } else if (FirstSec != &Sec) { + // The offset is relative to the first section in the PT_LOAD segment. Use + // sh_offset for non-SHF_ALLOC sections. + Off = Sec.OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset; + } + Sec.Offset = Off; + Off += Sec.Size; + } + return Off; +} + +// Rewrite p_offset and p_filesz of non-empty non-PT_PHDR segments after +// sh_offset values have been updated. +static uint64_t layoutSegmentsForOnlyKeepDebug(std::vector<Segment *> &Segments, + uint64_t HdrEnd) { + uint64_t MaxOffset = 0; + for (Segment *Seg : Segments) { + const SectionBase *Sec = Seg->firstSection(); + if (Seg->Type == PT_PHDR || !Sec) + continue; + + uint64_t Offset = Sec->Offset; + uint64_t FileSize = 0; + for (const SectionBase *Sec : Seg->Sections) { + uint64_t Size = Sec->Type == SHT_NOBITS ? 0 : Sec->Size; + if (Sec->Offset + Size > Offset) + FileSize = std::max(FileSize, Sec->Offset + Size - Offset); + } + + // If the segment includes EHDR and program headers, don't make it smaller + // than the headers. + if (Seg->Offset < HdrEnd && HdrEnd <= Seg->Offset + Seg->FileSize) { + FileSize += Offset - Seg->Offset; + Offset = Seg->Offset; + FileSize = std::max(FileSize, HdrEnd - Offset); + } + + Seg->Offset = Offset; + Seg->FileSize = FileSize; + MaxOffset = std::max(MaxOffset, Offset + FileSize); + } + return MaxOffset; +} + template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() { Segment &ElfHdr = Obj.ElfHdrSegment; ElfHdr.Type = PT_PHDR; @@ -1977,12 +2050,24 @@ OrderedSegments.push_back(&Obj.ElfHdrSegment); OrderedSegments.push_back(&Obj.ProgramHdrSegment); orderSegments(OrderedSegments); - // Offset is used as the start offset of the first segment to be laid out. - // Since the ELF Header (ElfHdrSegment) must be at the start of the file, - // we start at offset 0. - uint64_t Offset = 0; - Offset = layoutSegments(OrderedSegments, Offset); - Offset = layoutSections(Obj.sections(), Offset); + + uint64_t Offset; + if (OnlyKeepDebug) { + // For --only-keep-debug, the sections that did not preserve contents were + // changed to SHT_NOBITS. We now rewrite sh_offset fields of sections, and + // then rewrite p_offset/p_filesz of program headers. + uint64_t HdrEnd = + sizeof(Elf_Ehdr) + llvm::size(Obj.segments()) * sizeof(Elf_Phdr); + Offset = layoutSectionsForOnlyKeepDebug(Obj, HdrEnd); + Offset = std::max(Offset, + layoutSegmentsForOnlyKeepDebug(OrderedSegments, HdrEnd)); + } else { + // Offset is used as the start offset of the first segment to be laid out. + // Since the ELF Header (ElfHdrSegment) must be at the start of the file, + // we start at offset 0. + Offset = layoutSegments(OrderedSegments, 0); + Offset = layoutSections(Obj.sections(), Offset); + } // If we need to write the section header table out then we need to align the // Offset so that SHOffset is valid. if (WriteSectionHeaders)