diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -2,6 +2,10 @@ tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) +if(LLVM_ENABLE_ZLIB) + set(imported_libs ZLIB::ZLIB) +endif() + add_lld_library(lldELF AArch64ErrataFix.cpp Arch/AArch64.cpp @@ -58,6 +62,7 @@ LINK_LIBS lldCommon + ${imported_libs} ${LLVM_PTHREAD_LIB} DEPENDS diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -25,6 +25,12 @@ class InputSection; class InputSectionBase; +struct CompressedData { + std::unique_ptr[]> shards; + uint32_t numShards = 0; + uint32_t checksum = 0; +}; + // This represents a section in an output file. // It is composed of multiple InputSections. // The writer creates multiple OutputSections and assign them unique, @@ -113,7 +119,7 @@ private: // Used for implementation of --compress-debug-sections option. SmallVector zDebugHeader; - SmallVector compressedData; + CompressedData compressed; std::array getFiller(); }; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -15,7 +15,7 @@ #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/Support/Compression.h" +#include "llvm/Config/config.h" // LLVM_ENABLE_ZLIB #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Parallel.h" @@ -23,6 +23,9 @@ #include "llvm/Support/TimeProfiler.h" #include #include +#if LLVM_ENABLE_ZLIB +#include +#endif using namespace llvm; using namespace llvm::dwarf; @@ -284,13 +287,45 @@ memcpy(buf + i, filler.data(), size - i); } +#if LLVM_ENABLE_ZLIB +static SmallVector deflateShard(ArrayRef in, int level, + int flush) { + // 15 and 8 are default. windowBits=-15 is negative to generate raw deflate + // data with no zlib header or trailer. + z_stream s = {}; + deflateInit2(&s, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + s.next_in = const_cast(in.data()); + s.avail_in = in.size(); + + // Allocate a buffer of half of the input size, and grow it by 1.5x if + // insufficient. + SmallVector out; + size_t pos = 0; + out.resize_for_overwrite(std::max(in.size() / 2, 64)); + do { + if (pos == out.size()) + out.resize_for_overwrite(out.size() * 3 / 2); + s.next_out = out.data() + pos; + s.avail_out = out.size() - pos; + (void)deflate(&s, flush); + pos = s.next_out - out.data(); + } while (s.avail_out == 0); + assert(s.avail_in == 0); + + out.truncate(pos); + deflateEnd(&s); + return out; +} +#endif + // Compress section contents if this section contains debug info. template void OutputSection::maybeCompress() { +#if LLVM_ENABLE_ZLIB using Elf_Chdr = typename ELFT::Chdr; // Compress only DWARF debug sections. if (!config->compressDebugSections || (flags & SHF_ALLOC) || - !name.startswith(".debug_")) + !name.startswith(".debug_") || size == 0) return; llvm::TimeTraceScope timeScope("Compress debug sections"); @@ -309,13 +344,42 @@ // -O2 is given, we use level 6 to compress debug info more by ~15%. We found // that level 7 to 9 doesn't make much difference (~1% more compression) while // they take significant amount of time (~2x), so level 6 seems enough. - if (Error e = zlib::compress(toStringRef(buf), compressedData, - config->optimize >= 2 ? 6 : 1)) - fatal("compress failed: " + llvm::toString(std::move(e))); + const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED; + + // Split input into 1-MiB shards. + constexpr size_t shardSize = 1 << 20; + const size_t numShards = (size + shardSize - 1) / shardSize; + auto shardsIn = std::make_unique[]>(numShards); + for (size_t i = 0, start = 0, end; start != buf.size(); ++i, start = end) { + end = std::min(start + shardSize, buf.size()); + shardsIn[i] = makeArrayRef(buf.data() + start, end - start); + } + + // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all + // shards but the last to flush the output to a byte boundary to be + // concatenated with the next shard. + auto shardsOut = std::make_unique[]>(numShards); + auto shardsAdler = std::make_unique(numShards); + parallelForEachN(0, numShards, [&](size_t i) { + shardsOut[i] = deflateShard(shardsIn[i], level, + i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH); + shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size()); + }); + + // Update section size and combine Alder-32 checksums. + uint32_t checksum = 1; // Initial Adler-32 value + size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header + for (size_t i = 0; i != numShards; ++i) { + size += shardsOut[i].size(); + checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size()); + } + size += 4; // checksum - // Update section headers. - size = sizeof(Elf_Chdr) + compressedData.size(); + compressed.shards = std::move(shardsOut); + compressed.numShards = numShards; + compressed.checksum = checksum; flags |= SHF_COMPRESSED; +#endif } static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) { @@ -339,10 +403,25 @@ // If --compress-debug-section is specified and if this is a debug section, // we've already compressed section contents. If that's the case, // just write it down. - if (!compressedData.empty()) { + if (compressed.shards) { memcpy(buf, zDebugHeader.data(), zDebugHeader.size()); - memcpy(buf + zDebugHeader.size(), compressedData.data(), - compressedData.size()); + buf += zDebugHeader.size(); + size -= zDebugHeader.size(); + + // Compute shard offsets. + auto offsets = std::make_unique(compressed.numShards); + offsets[0] = 2; // zlib header + for (size_t i = 1; i != compressed.numShards; ++i) + offsets[i] = offsets[i - 1] + compressed.shards[i - 1].size(); + + buf[0] = 0x78; // CMF + buf[1] = 0x01; // FLG: best speed + parallelForEachN(0, compressed.numShards, [&](size_t i) { + memcpy(buf + offsets[i], compressed.shards[i].data(), + compressed.shards[i].size()); + }); + + write32be(buf + size - 4, compressed.checksum); return; }