Index: lld/ELF/AArch64ErrataFix.cpp =================================================================== --- lld/ELF/AArch64ErrataFix.cpp +++ lld/ELF/AArch64ErrataFix.cpp @@ -356,7 +356,7 @@ } uint64_t PatchOff = 0; - const uint8_t *Buf = IS->Data.begin(); + const uint8_t *Buf = IS->data().begin(); const ulittle32_t *InstBuf = reinterpret_cast(Buf + Off); uint32_t Instr1 = *InstBuf++; uint32_t Instr2 = *InstBuf++; @@ -411,7 +411,7 @@ void lld::elf::Patch843419Section::writeTo(uint8_t *Buf) { // Copy the instruction that we will be replacing with a branch in the // Patchee Section. - write32le(Buf, read32le(Patchee->Data.begin() + PatcheeOffset)); + write32le(Buf, read32le(Patchee->data().begin() + PatcheeOffset)); // Apply any relocation transferred from the original PatcheeSection. // For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc @@ -598,7 +598,7 @@ auto DataSym = std::next(CodeSym); uint64_t Off = (*CodeSym)->Value; uint64_t Limit = - (DataSym == MapSyms.end()) ? IS->Data.size() : (*DataSym)->Value; + (DataSym == MapSyms.end()) ? IS->data().size() : (*DataSym)->Value; while (Off < Limit) { uint64_t StartAddr = IS->getVA(Off); Index: lld/ELF/DWARF.cpp =================================================================== --- lld/ELF/DWARF.cpp +++ lld/ELF/DWARF.cpp @@ -30,26 +30,27 @@ for (InputSectionBase *Sec : Obj->getSections()) { if (!Sec) continue; + if (LLDDWARFSection *M = StringSwitch(Sec->Name) .Case(".debug_info", &InfoSection) .Case(".debug_ranges", &RangeSection) .Case(".debug_line", &LineSection) .Default(nullptr)) { - Sec->maybeDecompress(); - M->Data = toStringRef(Sec->Data); + M->Data = toStringRef(Sec->data()); M->Sec = Sec; continue; } + if (Sec->Name == ".debug_abbrev") - AbbrevSection = toStringRef(Sec->Data); + AbbrevSection = toStringRef(Sec->data()); else if (Sec->Name == ".debug_gnu_pubnames") - GnuPubNamesSection = toStringRef(Sec->Data); + GnuPubNamesSection = toStringRef(Sec->data()); else if (Sec->Name == ".debug_gnu_pubtypes") - GnuPubTypesSection = toStringRef(Sec->Data); + GnuPubTypesSection = toStringRef(Sec->data()); else if (Sec->Name == ".debug_str") - StrSection = toStringRef(Sec->Data); - if (Sec->Name == ".debug_line_str") - LineStringSection = toStringRef(Sec->Data); + StrSection = toStringRef(Sec->data()); + else if (Sec->Name == ".debug_line_str") + LineStringSection = toStringRef(Sec->data()); } } Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -1600,14 +1600,13 @@ } // This adds a .comment section containing a version string. We have to add it - // before decompressAndMergeSections because the .comment section is a + // before uncompressAndMergeSections because the .comment section is a // mergeable section. if (!Config->Relocatable) InputSections.push_back(createCommentSection()); // Do size optimizations: garbage collection, merging of SHF_MERGE sections // and identical code folding. - decompressSections(); splitSections(); markLive(); demoteSharedSymbols(); Index: lld/ELF/EhFrame.cpp =================================================================== --- lld/ELF/EhFrame.cpp +++ lld/ELF/EhFrame.cpp @@ -44,7 +44,7 @@ private: template void failOn(const P *Loc, const Twine &Msg) { fatal("corrupted .eh_frame: " + Msg + "\n>>> defined in " + - IS->getObjMsg((const uint8_t *)Loc - IS->Data.data())); + IS->getObjMsg((const uint8_t *)Loc - IS->data().data())); } uint8_t readByte(); @@ -59,7 +59,7 @@ } size_t elf::readEhRecordSize(InputSectionBase *S, size_t Off) { - return EhReader(S, S->Data.slice(Off)).readEhRecordSize(); + return EhReader(S, S->data().slice(Off)).readEhRecordSize(); } // .eh_frame section is a sequence of records. Each record starts with Index: lld/ELF/ICF.cpp =================================================================== --- lld/ELF/ICF.cpp +++ lld/ELF/ICF.cpp @@ -301,7 +301,7 @@ template bool ICF::equalsConstant(const InputSection *A, const InputSection *B) { if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || - A->getSize() != B->getSize() || A->Data != B->Data) + A->getSize() != B->getSize() || A->data() != B->data()) return false; // If two sections have different output sections, we cannot merge them. @@ -439,7 +439,7 @@ // Initially, we use hash values to partition sections. parallelForEach(Sections, [&](InputSection *S) { // Set MSB to 1 to avoid collisions with non-hash IDs. - S->Class[0] = xxHash64(S->Data) | (1U << 31); + S->Class[0] = xxHash64(S->data()) | (1U << 31); }); // From now on, sections in Sections vector are ordered so that sections Index: lld/ELF/InputFiles.cpp =================================================================== --- lld/ELF/InputFiles.cpp +++ lld/ELF/InputFiles.cpp @@ -605,7 +605,7 @@ // as a given section. static InputSection *toRegularSection(MergeInputSection *Sec) { return make(Sec->File, Sec->Flags, Sec->Type, Sec->Alignment, - Sec->Data, Sec->Name); + Sec->data(), Sec->Name); } template Index: lld/ELF/InputSection.h =================================================================== --- lld/ELF/InputSection.h +++ lld/ELF/InputSection.h @@ -115,7 +115,12 @@ return cast_or_null>(File); } - ArrayRef Data; + ArrayRef data() const { + if (UncompressedSize >= 0 && !UncompressedBuf) + uncompress(); + return RawData; + } + uint64_t getOffsetInFile() const; // True if this section has already been placed to a linker script @@ -169,11 +174,6 @@ template Defined *getEnclosingFunction(uint64_t Offset); - // Compilers emit zlib-compressed debug sections if the -gz option - // is given. This function checks if this section is compressed, and - // if so, decompress in memory. - void maybeDecompress(); - // Returns a source location string. Used to construct an error message. template std::string getLocation(uint64_t Offset); std::string getSrcMsg(const Symbol &Sym, uint64_t Offset); @@ -200,15 +200,21 @@ template llvm::ArrayRef getDataAs() const { - size_t S = Data.size(); + size_t S = data().size(); assert(S % sizeof(T) == 0); - return llvm::makeArrayRef((const T *)Data.data(), S / sizeof(T)); + return llvm::makeArrayRef((const T *)data().data(), S / sizeof(T)); } -private: - // A pointer that owns decompressed data if a section is compressed by zlib. +protected: + void parseCompressedHeader(); + void uncompress() const; + + mutable ArrayRef RawData; + + // A pointer that owns uncompressed data if a section is compressed by zlib. // Since the feature is not used often, this is usually a nullptr. - std::unique_ptr DecompressBuf; + mutable std::unique_ptr UncompressedBuf; + int64_t UncompressedSize = -1; }; // SectionPiece represents a piece of splittable section contents. @@ -255,8 +261,8 @@ llvm::CachedHashStringRef getData(size_t I) const { size_t Begin = Pieces[I].InputOff; size_t End = - (Pieces.size() - 1 == I) ? Data.size() : Pieces[I + 1].InputOff; - return {toStringRef(Data.slice(Begin, End - Begin)), Pieces[I].Hash}; + (Pieces.size() - 1 == I) ? data().size() : Pieces[I + 1].InputOff; + return {toStringRef(data().slice(Begin, End - Begin)), Pieces[I].Hash}; } // Returns the SectionPiece at a given input section offset. @@ -277,7 +283,9 @@ unsigned FirstRelocation) : InputOff(Off), Sec(Sec), Size(Size), FirstRelocation(FirstRelocation) {} - ArrayRef data() { return {Sec->Data.data() + this->InputOff, Size}; } + ArrayRef data() { + return {Sec->data().data() + this->InputOff, Size}; + } size_t InputOff; ssize_t OutputOff = -1; Index: lld/ELF/InputSection.cpp =================================================================== --- lld/ELF/InputSection.cpp +++ lld/ELF/InputSection.cpp @@ -21,7 +21,6 @@ #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" -#include "llvm/Object/Decompressor.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" @@ -64,11 +63,11 @@ StringRef Name, Kind SectionKind) : SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info, Link), - File(File), Data(Data) { + File(File), RawData(Data) { // In order to reduce memory allocation, we assume that mergeable // sections are smaller than 4 GiB, which is not an unreasonable // assumption as of 2017. - if (SectionKind == SectionBase::Merge && Data.size() > UINT32_MAX) + if (SectionKind == SectionBase::Merge && RawData.size() > UINT32_MAX) error(toString(this) + ": section too large"); NumRelocations = 0; @@ -80,6 +79,17 @@ if (!isPowerOf2_64(V)) fatal(toString(File) + ": section sh_addralign is not a power of 2"); this->Alignment = V; + + // In ELF, each section can be compressed by zlib, and if compressed, + // section name may be mangled by appending "z" (e.g. ".zdebug_info"). + // If that's the case, demangle section name so that we can handle a + // section as if it weren't compressed. + if ((Flags & SHF_COMPRESSED) || Name.startswith(".zdebug")) { + if (!zlib::isAvailable()) + error(toString(File) + ": contains a compressed section, " + + "but zlib is not available"); + parseCompressedHeader(); + } } // Drop SHF_GROUP bit unless we are producing a re-linkable object file. @@ -128,13 +138,25 @@ size_t InputSectionBase::getSize() const { if (auto *S = dyn_cast(this)) return S->getSize(); + if (UncompressedSize >= 0) + return UncompressedSize; + return RawData.size(); +} + +void InputSectionBase::uncompress() const { + size_t Size = UncompressedSize; + UncompressedBuf.reset(new char[Size]); - return Data.size(); + if (Error E = + zlib::uncompress(toStringRef(RawData), UncompressedBuf.get(), Size)) + fatal(toString(this) + + ": uncompress failed: " + llvm::toString(std::move(E))); + RawData = makeArrayRef((uint8_t *)UncompressedBuf.get(), Size); } uint64_t InputSectionBase::getOffsetInFile() const { const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart(); - const uint8_t *SecStart = Data.begin(); + const uint8_t *SecStart = data().begin(); return SecStart - FileStart; } @@ -180,34 +202,67 @@ return Sec ? Sec->getParent() : nullptr; } -// Decompress section contents if required. Note that this function -// is called from parallelForEach, so it must be thread-safe. -void InputSectionBase::maybeDecompress() { - if (DecompressBuf) - return; - if (!(Flags & SHF_COMPRESSED) && !Name.startswith(".zdebug")) - return; +// When a section is compressed, `RawData` consists with a header followed +// by zlib-compressed data. This function parses a header to initialize +// `UncompressedSize` member and remove the header from `RawData`. +void InputSectionBase::parseCompressedHeader() { + // Old-style header + if (Name.startswith(".zdebug")) { + if (!toStringRef(RawData).startswith("ZLIB")) { + error(toString(this) + ": corrupted compressed section header"); + return; + } + RawData = RawData.slice(4); - // Decompress a section. - Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data), - Config->IsLE, Config->Is64)); + if (RawData.size() < 8) { + error(toString(this) + ": corrupted compressed section header"); + return; + } - size_t Size = Dec.getDecompressedSize(); - DecompressBuf.reset(new char[Size + Name.size()]()); - if (Error E = Dec.decompress({DecompressBuf.get(), Size})) - fatal(toString(this) + - ": decompress failed: " + llvm::toString(std::move(E))); + UncompressedSize = read64be(RawData.data()); + RawData = RawData.slice(8); - Data = makeArrayRef((uint8_t *)DecompressBuf.get(), Size); + // Restore the original section name. + // (e.g. ".zdebug_info" -> ".debug_info") + Name = Saver.save("." + Name.substr(2)); + return; + } + + assert(Flags & SHF_COMPRESSED); Flags &= ~(uint64_t)SHF_COMPRESSED; - // A section name may have been altered if compressed. If that's - // the case, restore the original name. (i.e. ".zdebug_" -> ".debug_") - if (Name.startswith(".zdebug")) { - DecompressBuf[Size] = '.'; - memcpy(&DecompressBuf[Size + 1], Name.data() + 2, Name.size() - 2); - Name = StringRef(&DecompressBuf[Size], Name.size() - 1); + // New-style 64-bit header + if (Config->Is64) { + if (RawData.size() < sizeof(Elf64_Chdr)) { + error(toString(this) + ": corrupted compressed section"); + return; + } + + auto *Hdr = reinterpret_cast(RawData.data()); + if (read32(&Hdr->ch_type) != ELFCOMPRESS_ZLIB) { + error(toString(this) + ": unsupported compression type"); + return; + } + + UncompressedSize = read64(&Hdr->ch_size); + RawData = RawData.slice(sizeof(*Hdr)); + return; + } + + // New-style 32-bit header + if (RawData.size() < sizeof(Elf32_Chdr)) { + error(toString(this) + ": corrupted compressed section"); + return; } + + auto *Hdr = reinterpret_cast(RawData.data()); + if (read32(&Hdr->ch_type) != ELFCOMPRESS_ZLIB) { + error(toString(this) + ": unsupported compression type"); + return; + } + + UncompressedSize = read32(&Hdr->ch_size); + RawData = RawData.slice(sizeof(*Hdr)); } InputSection *InputSectionBase::getLinkOrderDep() const { @@ -381,7 +436,7 @@ } int64_t Addend = getAddend(Rel); - const uint8_t *BufLoc = Sec->Data.begin() + Rel.r_offset; + const uint8_t *BufLoc = Sec->data().begin() + Rel.r_offset; if (!RelTy::IsRela) Addend = Target->getImplicitAddend(BufLoc, Type); @@ -988,10 +1043,23 @@ return; } + // If this is a compressed section, uncompress section contents directly + // to the buffer. + if (UncompressedSize >= 0 && !UncompressedBuf) { + size_t Size = UncompressedSize; + if (Error E = zlib::uncompress(toStringRef(RawData), + (char *)(Buf + OutSecOff), Size)) + fatal(toString(this) + + ": uncompress failed: " + llvm::toString(std::move(E))); + uint8_t *BufEnd = Buf + OutSecOff + Size; + relocate(Buf, BufEnd); + return; + } + // Copy section contents from source object file to output file // and then apply relocations. - memcpy(Buf + OutSecOff, Data.data(), Data.size()); - uint8_t *BufEnd = Buf + OutSecOff + Data.size(); + memcpy(Buf + OutSecOff, data().data(), data().size()); + uint8_t *BufEnd = Buf + OutSecOff + data().size(); relocate(Buf, BufEnd); } @@ -1042,7 +1110,7 @@ template void EhInputSection::split(ArrayRef Rels) { unsigned RelI = 0; - for (size_t Off = 0, End = Data.size(); Off != End;) { + for (size_t Off = 0, End = data().size(); Off != End;) { size_t Size = readEhRecordSize(this, Off); Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI)); // The empty record is the end marker. @@ -1122,9 +1190,9 @@ assert(Pieces.empty()); if (Flags & SHF_STRINGS) - splitStrings(Data, Entsize); + splitStrings(data(), Entsize); else - splitNonStrings(Data, Entsize); + splitNonStrings(data(), Entsize); OffsetMap.reserve(Pieces.size()); for (size_t I = 0, E = Pieces.size(); I != E; ++I) @@ -1145,7 +1213,7 @@ } SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) { - if (this->Data.size() <= Offset) + if (this->data().size() <= Offset) fatal(toString(this) + ": offset is outside the section"); // Find a piece starting at a given offset. Index: lld/ELF/MarkLive.cpp =================================================================== --- lld/ELF/MarkLive.cpp +++ lld/ELF/MarkLive.cpp @@ -45,7 +45,7 @@ template static typename ELFT::uint getAddend(InputSectionBase &Sec, const typename ELFT::Rel &Rel) { - return Target->getImplicitAddend(Sec.Data.begin() + Rel.r_offset, + return Target->getImplicitAddend(Sec.data().begin() + Rel.r_offset, Rel.getType(Config->IsMips64EL)); } Index: lld/ELF/Relocations.cpp =================================================================== --- lld/ELF/Relocations.cpp +++ lld/ELF/Relocations.cpp @@ -582,7 +582,7 @@ if (PairTy == R_MIPS_NONE) return 0; - const uint8_t *Buf = Sec.Data.data(); + const uint8_t *Buf = Sec.data().data(); uint32_t SymIndex = Rel.getSymbol(Config->IsMips64EL); // To make things worse, paired relocations might not be contiguous in @@ -610,7 +610,7 @@ if (RelTy::IsRela) { Addend = getAddend(Rel); } else { - const uint8_t *Buf = Sec.Data.data(); + const uint8_t *Buf = Sec.data().data(); Addend = Target->getImplicitAddend(Buf + Rel.r_offset, Type); } @@ -962,7 +962,7 @@ if (maybeReportUndefined(Sym, Sec, Rel.r_offset)) return; - const uint8_t *RelocatedAddr = Sec.Data.begin() + Rel.r_offset; + const uint8_t *RelocatedAddr = Sec.data().begin() + Rel.r_offset; RelExpr Expr = Target->getRelExpr(Type, Sym, RelocatedAddr); // Ignore "hint" relocations because they are only markers for relaxation. Index: lld/ELF/SyntheticSections.h =================================================================== --- lld/ELF/SyntheticSections.h +++ lld/ELF/SyntheticSections.h @@ -970,7 +970,6 @@ InputSection *createInterpSection(); MergeInputSection *createCommentSection(); -void decompressSections(); template void splitSections(); void mergeSections(); Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -33,8 +33,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" -#include "llvm/Object/Decompressor.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" @@ -105,7 +105,7 @@ Create = true; std::string Filename = toString(Sec->File); - const size_t Size = Sec->Data.size(); + const size_t Size = Sec->data().size(); // Older version of BFD (such as the default FreeBSD linker) concatenate // .MIPS.abiflags instead of merging. To allow for this case (or potential // zero padding) we ignore everything after the first Elf_Mips_ABIFlags @@ -114,7 +114,7 @@ Twine(Size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags))); return nullptr; } - auto *S = reinterpret_cast(Sec->Data.data()); + auto *S = reinterpret_cast(Sec->data().data()); if (S->version != 0) { error(Filename + ": unexpected .MIPS.abiflags version " + Twine(S->version)); @@ -177,7 +177,7 @@ Sec->Live = false; std::string Filename = toString(Sec->File); - ArrayRef D = Sec->Data; + ArrayRef D = Sec->data(); while (!D.empty()) { if (D.size() < sizeof(Elf_Mips_Options)) { @@ -233,12 +233,12 @@ for (InputSectionBase *Sec : Sections) { Sec->Live = false; - if (Sec->Data.size() != sizeof(Elf_Mips_RegInfo)) { + if (Sec->data().size() != sizeof(Elf_Mips_RegInfo)) { error(toString(Sec->File) + ": invalid size of .reginfo section"); return nullptr; } - auto *R = reinterpret_cast(Sec->Data.data()); + auto *R = reinterpret_cast(Sec->data().data()); Reginfo.ri_gprmask |= R->ri_gprmask; Sec->getFile()->MipsGp0 = R->ri_gp_value; }; @@ -2898,12 +2898,6 @@ return make(Name, Type, Flags, Alignment); } -// Debug sections may be compressed by zlib. Decompress if exists. -void elf::decompressSections() { - parallelForEach(InputSections, - [](InputSectionBase *Sec) { Sec->maybeDecompress(); }); -} - template void elf::splitSections() { // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). Index: lld/test/ELF/compressed-debug-input-err.s =================================================================== --- lld/test/ELF/compressed-debug-input-err.s +++ lld/test/ELF/compressed-debug-input-err.s @@ -3,8 +3,8 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o # RUN: not ld.lld %t.o -o %t.so -shared 2>&1 | FileCheck %s -## Check we are able to report zlib decompressor errors. -# CHECK: error: {{.*}}.o:(.zdebug_str): decompress failed: zlib error: Z_DATA_ERROR +## Check we are able to report zlib uncompress errors. +# CHECK: error: {{.*}}.o:(.debug_str): uncompress failed: zlib error: Z_DATA_ERROR .section .zdebug_str,"MS",@progbits,1 .ascii "ZLIB" Index: lld/test/ELF/relocatable-compressed-input.s =================================================================== --- lld/test/ELF/relocatable-compressed-input.s +++ lld/test/ELF/relocatable-compressed-input.s @@ -7,7 +7,7 @@ # RUN: ld.lld %t1 -o %t2 -r # RUN: llvm-readobj -sections -section-data %t2 | FileCheck %s -## Check we decompress section and remove ".z" prefix specific for zlib-gnu compression. +## Check we uncompress section and remove ".z" prefix specific for zlib-gnu compression. # CHECK: Section { # CHECK: Index: # CHECK: Name: .debug_str Index: lld/test/ELF/strip-debug.s =================================================================== --- lld/test/ELF/strip-debug.s +++ lld/test/ELF/strip-debug.s @@ -12,3 +12,5 @@ .section .debug_Foo,"",@progbits .section .zdebug_Bar,"",@progbits +.ascii "ZLIB" +.quad 0