Index: include/llvm/Object/Compressor.h =================================================================== --- /dev/null +++ include/llvm/Object/Compressor.h @@ -0,0 +1,53 @@ +//===-- Compressor.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-----------------------------------------------------------------------===/ + +#ifndef LLVM_OBJECT_COMPRESSOR_H +#define LLVM_OBJECT_COMPRESSOR_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/EndianStream.h" + +namespace llvm { +namespace object { + +/// Compressor helps to handle compression of compressed sections. +class Compressor { +public: + /// Create compressor object. + /// @param Data Section content. + static Expected create(StringRef Data); + + /// Return the gnu style compressed section name. + static StringRef getDebugSectionName(const StringRef Name, bool IsGnuStyle); + + void writeHeader(support::endian::Writer &W, uint64_t DecompressedSize, + unsigned Alignment, bool Is64Bit, bool IsGnuStyle); + + /// Uncompress section data to raw buffer provided. + /// @param W Destination buffer stream. + Error writeCompressedSectionData(support::endian::Writer &W) { + SmallVector CompressedBuffer; + auto E = zlib::compress(SectionData, CompressedBuffer); + W.OS << StringRef(CompressedBuffer.data(), CompressedBuffer.size()).str(); + return E; + } + +private: + Compressor() {} + Compressor(StringRef Data) : SectionData(Data) {} + StringRef SectionData; +}; + +} // end namespace object +} // end namespace llvm + +#endif // LLVM_OBJECT_COMPRESSOR_H Index: lib/Object/CMakeLists.txt =================================================================== --- lib/Object/CMakeLists.txt +++ lib/Object/CMakeLists.txt @@ -21,6 +21,7 @@ SymbolSize.cpp WasmObjectFile.cpp WindowsResource.cpp + Compressor.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object Index: lib/Object/Compressor.cpp =================================================================== --- /dev/null +++ lib/Object/Compressor.cpp @@ -0,0 +1,110 @@ +//===-- Compressor.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Compressor.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace object; + +Expected Compressor::create(StringRef Data) { + if (!zlib::isAvailable()) + return createError("zlib is not available"); + + Compressor D(Data); + return D; +} + +StringRef Compressor::getDebugSectionName(const StringRef Name, + bool IsGnuStyle) { + + if (!Name.startswith(".debug") && !Name.startswith(".zdebug")) + return ""; + + StringRef LookupName = Name.substr(Name.startswith(".debug") ? 1 : 2); + auto [debugName, zdebugName] = + StringSwitch>(LookupName) + .Case("debug_str", {".debug_str", ".zdebug_str"}) + .Case("debug_info", {".debug_info", ".zdebug_info"}) + .Case("debug_line", {".debug_line", ".zdebug_line"}) + .Case("debug_abbrev", {".debug_abbrev", ".zdebug_abbrev"}) + .Case("debug_loc", {".debug_loc", ".zdebug_loc"}) + .Case("debug_str_offsets", + {".debug_str_offsets", ".zdebug_str_offsets"}) + .Case("debug_ranges", {".debug_ranges", ".zdebug_ranges"}) + .Case("debug_rnglists", {".debug_rnglists", ".zdebug_rnglists"}) + .Case("debug_info.dwo", {".debug_info.dwo", ".zdebug_info.dwo"}) + .Case("debug_loc.dwo", {".debug_loc.dwo", ".zdebug_loc.dwo"}) + .Case("debug_line.dwo", {".debug_line.dwo", ".zdebug_line.dwo"}) + .Case("debug_names", {".debug_names", ".zdebug_names"}) + .Case("debug_rnglists.dwo", + {".debug_rnglists.dwo", ".zdebug_rnglists.dwo"}) + .Case("debug_str_offsets.dwo", + {".debug_str_offsets.dwo", ".zdebug_str_offsets.dwo"}) + .Case("debug_addr", {".debug_addr", ".zdebug_addr"}) + .Case("debug_aranges", {".debug_aranges", ".zdebug_aranges"}) + .Case("debug_frame", {".debug_frame", ".zdebug_frame"}) + .Case("debug_macinfo", {".debug_macinfo", ".zdebug_macinfo"}) + .Case("debug_pubnames", {".debug_pubnames", ".zdebug_pubnames"}) + .Case("debug_pubtypes", {".debug_pubtypes", ".zdebug_pubtypes"}) + .Case("debug_gnu_pubnames", + {".debug_gnu_pubnames", ".zdebug_gnu_pubnames"}) + .Case("debug_gnu_pubtypes", + {".debug_gnu_pubtypes", ".zdebug_gnu_pubtypes"}) + .Case("debug_abbrev.dwo", {".debug_abbrev.dwo", ".zdebug_abbrev.dwo"}) + .Case("debug_str.dwo", {".debug_str.dwo", ".zdebug_str.dwo"}) + .Case("debug_cu_index", {".debug_cu_index", ".zdebug_cu_index"}) + .Case("debug_tu_index", {".debug_tu_index", ".zdebug_tu_index"}) + .Case("debug_line_str", {".debug_line_str", ".zdebug_line_str"}) + .Case("debug_loclists", {".debug_loclists", ".zdebug_loclists"}) + .Case("debug_types", {".debug_types", ".zdebug_types"}) + // Any more debug info sections go here. + .Default({"", ""}); + + return IsGnuStyle ? zdebugName : debugName; +} + +static void produceCompressedGnuHeader(support::endian::Writer &W, + uint64_t DecompressedSize) { + const StringRef Magic = "ZLIB"; + W.OS << Magic; + support::endian::write(W.OS, DecompressedSize, support::big); +} + +static void produceCompressedZLibHeader(support::endian::Writer &W, + uint64_t DecompressedSize, + unsigned Alignment, bool Is64Bit) { + if (Is64Bit) { + // Write Elf64_Chdr header. + W.write(static_cast(ELF::ELFCOMPRESS_ZLIB)); + W.write(static_cast(0)); // ch_reserved field. + W.write(static_cast(DecompressedSize)); + W.write(static_cast(Alignment)); + } else { + // Write Elf32_Chdr header otherwise. + W.write(static_cast(ELF::ELFCOMPRESS_ZLIB)); + W.write(static_cast(DecompressedSize)); + W.write(static_cast(Alignment)); + } +} + +void Compressor::writeHeader(support::endian::Writer &W, + uint64_t DecompressedSize, unsigned Alignment, + bool Is64Bit, bool IsGnuStyle) { + return IsGnuStyle ? produceCompressedGnuHeader(W, DecompressedSize) + : produceCompressedZLibHeader(W, DecompressedSize, + Alignment, Is64Bit); +} Index: tools/llvm-objcopy/ObjcopyOpts.td =================================================================== --- tools/llvm-objcopy/ObjcopyOpts.td +++ tools/llvm-objcopy/ObjcopyOpts.td @@ -16,6 +16,11 @@ defm output_target : Eq<"output-target">, HelpText<"Format of the output file">, Values<"binary">; +defm compress_debug_sections : Eq<"compress-debug-sections">, + MetaVarName<"[ none | zlib | zlib-gnu (deprecated) ]">, + HelpText<"Enable zlib-gnu or zlib Compression of DWARF debug sections.">; +def decompress_debug_sections : Flag<["-", "--"], "decompress-debug-sections">, + HelpText<"Decompress DWARF debug sections.">; def O : JoinedOrSeparate<["-"], "O">, Alias; defm split_dwo : Eq<"split-dwo">, Index: tools/llvm-objcopy/Object.h =================================================================== --- tools/llvm-objcopy/Object.h +++ tools/llvm-objcopy/Object.h @@ -14,7 +14,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/Compressor.h" +#include "llvm/Object/Decompressor.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/JamCRC.h" @@ -43,6 +46,44 @@ class Object; struct Symbol; +struct CopyConfig { + StringRef OutputFilename; + StringRef InputFilename; + StringRef OutputFormat; + StringRef InputFormat; + StringRef BinaryArch; + + StringRef SplitDWO; + StringRef AddGnuDebugLink; + std::vector ToRemove; + std::vector Keep; + std::vector OnlyKeep; + std::vector AddSection; + std::vector SymbolsToLocalize; + std::vector SymbolsToGlobalize; + std::vector SymbolsToWeaken; + std::vector SymbolsToRemove; + std::vector SymbolsToKeep; + StringMap SectionsToRename; + StringMap SymbolsToRename; + bool StripAll = false; + bool StripAllGNU = false; + bool StripDebug = false; + bool StripSections = false; + bool StripNonAlloc = false; + bool StripDWO = false; + bool StripUnneeded = false; + bool ExtractDWO = false; + bool LocalizeHidden = false; + bool Weaken = false; + bool DiscardAll = false; + bool OnlyKeepDebug = false; + bool KeepFileSymbols = false; + bool DecompressDebugSections = false; + + DebugCompressionType CompressDebugSections = DebugCompressionType::None; +}; + class SectionTableRef { MutableArrayRef> Sections; @@ -190,6 +231,9 @@ template class ELFWriter : public Writer { private: + DebugCompressionType CompressDebugSections = DebugCompressionType::None; + bool DecompressDebugSections = false; + using Elf_Shdr = typename ELFT::Shdr; using Elf_Phdr = typename ELFT::Phdr; using Elf_Ehdr = typename ELFT::Ehdr; @@ -208,14 +252,37 @@ size_t totalSize() const; + constexpr bool isBigEndian() const { + static_assert((std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value), + "Invalid ELFT."); + return std::is_same::value || + std::is_same::value; + } + + constexpr bool is64Bit() const { + static_assert((std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value), + "Invalid ELFT."); + return std::is_same::value || + std::is_same::value; + } + public: virtual ~ELFWriter() {} bool WriteSectionHeaders = true; void finalize() override; void write() override; - ELFWriter(Object &Obj, Buffer &Buf, bool WSH) - : Writer(Obj, Buf), WriteSectionHeaders(WSH) {} + ELFWriter(Object &Obj, Buffer &Buf, const CopyConfig &Config) + : Writer(Obj, Buf), WriteSectionHeaders(!Config.StripSections) { + this->CompressDebugSections = Config.CompressDebugSections; + this->DecompressDebugSections = Config.DecompressDebugSections; + } }; class BinaryWriter : public Writer { @@ -259,6 +326,14 @@ virtual void removeSymbols(function_ref ToRemove); virtual void accept(SectionVisitor &Visitor) const = 0; virtual void markSymbols(); + virtual bool isCompressable() const { return false; } + virtual bool isCompressed() const { return false; } + virtual void compress(bool IsGnuStyle, bool Is64Bit, bool IsBigEndian) { + assert(false && "Section is not compressable."); + } + virtual void decompress(bool Is64Bit, bool IsBigEndian) { + assert(false && "Section is not decompressable."); + } }; class Segment { @@ -308,6 +383,8 @@ MAKE_SEC_WRITER_FRIEND ArrayRef Contents; + SmallVector ModifiedContents; + bool doCompressionFinalize = false; SectionBase *LinkSection = nullptr; public: @@ -317,6 +394,62 @@ void removeSectionReferences(const SectionBase *Sec) override; void initialize(SectionTableRef SecTable) override; void finalize() override; + virtual bool isCompressable() const override { + StringRef GnuZDebugName = + object::Compressor::getDebugSectionName(Name, true); + return Name.startswith(".debug") && (GnuZDebugName != ""); + } + virtual bool isCompressed() const override { + if (!Name.startswith(".debug") && !Name.startswith(".zdebug")) + return false; + + StringRef magic((const char *)Contents.data(), 4); + const bool IsCompressed = Name.startswith(".zdebug") || + (Flags & ELF::SHF_COMPRESSED) || + (magic == "ZLIB"); + return IsCompressed; + } + virtual void compress(bool IsGnuStyle, bool Is64Bit, + bool IsBigEndian) override { + StringRef UncompressedBuffer((const char *)Contents.data(), + Contents.size()); + raw_svector_ostream OS(ModifiedContents); + support::endian::Writer W(OS, IsBigEndian ? support::big : support::little); + + Expected Compressor = + object::Compressor::create(UncompressedBuffer); + if (!Compressor) + consumeError(Compressor.takeError()); + + Compressor->writeHeader(W, Contents.size(), Align, Is64Bit, IsGnuStyle); + auto E = Compressor->writeCompressedSectionData(W); + consumeError(std::move(E)); + + doCompressionFinalize = (ModifiedContents.size() < Size); + if (doCompressionFinalize && IsGnuStyle) + Name = object::Compressor::getDebugSectionName(Name, IsGnuStyle); + } + virtual void decompress(bool Is64Bit, bool IsBigEndian) override { + if (!isCompressed()) + return; + + Expected Decompressor = object::Decompressor::create( + Name, StringRef((const char *)Contents.data(), Contents.size()), + !IsBigEndian, Is64Bit); + if (!Decompressor) + consumeError(Decompressor.takeError()); + + auto E = Decompressor->resizeAndDecompress(ModifiedContents); + consumeError(std::move(E)); + + Contents = ArrayRef((const uint8_t *)ModifiedContents.data(), + ModifiedContents.size()); + Size = ModifiedContents.size(); + + const bool IsGnuStyle = Name.startswith(".zdebug"); + if (IsGnuStyle) + Name = object::Compressor::getDebugSectionName(Name, !IsGnuStyle); + } }; class OwnedDataSection : public SectionBase { Index: tools/llvm-objcopy/Object.cpp =================================================================== --- tools/llvm-objcopy/Object.cpp +++ tools/llvm-objcopy/Object.cpp @@ -489,7 +489,20 @@ } } -void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; } +void Section::finalize() { + this->Link = LinkSection ? LinkSection->Index : 0; + + if (!doCompressionFinalize) + return; + + Contents = ArrayRef((const uint8_t *)ModifiedContents.data(), + ModifiedContents.size()); + Size = ModifiedContents.size(); + + const bool IsGnuStyle = Name.startswith(".zdebug"); + if (!IsGnuStyle) + Flags |= ELF::SHF_COMPRESSED; +} void GnuDebugLinkSection::init(StringRef File, StringRef Data) { FileName = sys::path::filename(File); @@ -1256,10 +1269,21 @@ } } + bool IsGnuStyle = (CompressDebugSections == DebugCompressionType::GNU); + bool doCompress = (CompressDebugSections != DebugCompressionType::None) && + !DecompressDebugSections; + // Make sure we add the names of all the sections. Importantly this must be // done after we decide to add or remove SectionIndexes. if (Obj.SectionNames != nullptr) - for (const auto &Section : Obj.sections()) { + for (auto &Section : Obj.sections()) { + + if (doCompress && Section.isCompressable()) { + Section.compress(IsGnuStyle, is64Bit(), isBigEndian()); + } else if (DecompressDebugSections && Section.isCompressed()) { + Section.decompress(is64Bit(), isBigEndian()); + } + Obj.SectionNames->addString(Section.Name); } Index: tools/llvm-objcopy/llvm-objcopy.cpp =================================================================== --- tools/llvm-objcopy/llvm-objcopy.cpp +++ tools/llvm-objcopy/llvm-objcopy.cpp @@ -115,41 +115,6 @@ StripOptTable() : OptTable(StripInfoTable, true) {} }; -struct CopyConfig { - StringRef OutputFilename; - StringRef InputFilename; - StringRef OutputFormat; - StringRef InputFormat; - StringRef BinaryArch; - - StringRef SplitDWO; - StringRef AddGnuDebugLink; - std::vector ToRemove; - std::vector Keep; - std::vector OnlyKeep; - std::vector AddSection; - std::vector SymbolsToLocalize; - std::vector SymbolsToGlobalize; - std::vector SymbolsToWeaken; - std::vector SymbolsToRemove; - std::vector SymbolsToKeep; - StringMap SectionsToRename; - StringMap SymbolsToRename; - bool StripAll = false; - bool StripAllGNU = false; - bool StripDebug = false; - bool StripSections = false; - bool StripNonAlloc = false; - bool StripDWO = false; - bool StripUnneeded = false; - bool ExtractDWO = false; - bool LocalizeHidden = false; - bool Weaken = false; - bool DiscardAll = false; - bool OnlyKeepDebug = false; - bool KeepFileSymbols = false; -}; - using SectionPred = std::function; } // namespace @@ -207,17 +172,13 @@ // Depending on the initial ELFT and OutputFormat we need a different Writer. switch (OutputElfType) { case ELFT_ELF32LE: - return llvm::make_unique>(Obj, Buf, - !Config.StripSections); + return llvm::make_unique>(Obj, Buf, Config); case ELFT_ELF64LE: - return llvm::make_unique>(Obj, Buf, - !Config.StripSections); + return llvm::make_unique>(Obj, Buf, Config); case ELFT_ELF32BE: - return llvm::make_unique>(Obj, Buf, - !Config.StripSections); + return llvm::make_unique>(Obj, Buf, Config); case ELFT_ELF64BE: - return llvm::make_unique>(Obj, Buf, - !Config.StripSections); + return llvm::make_unique>(Obj, Buf, Config); } llvm_unreachable("Invalid output format"); } @@ -585,6 +546,11 @@ Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target); Config.BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture); + if (InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections) == "zlib-gnu") + Config.CompressDebugSections = DebugCompressionType::GNU; + else if (InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections) == "zlib") + Config.CompressDebugSections = DebugCompressionType::Z; + Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo); Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink); @@ -625,6 +591,7 @@ Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all); Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug); Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols); + Config.DecompressDebugSections = InputArgs.hasArg(OBJCOPY_decompress_debug_sections); for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) Config.SymbolsToLocalize.push_back(Arg->getValue()); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) @@ -636,6 +603,12 @@ for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) Config.SymbolsToKeep.push_back(Arg->getValue()); + if (Config.DecompressDebugSections && + Config.CompressDebugSections != DebugCompressionType::None) { + error("Can not specify --compress-debug-sections as well as " + "--decompress-debug-sections at the same time."); + } + return Config; }