Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Standalone View
lld/ELF/OutputSections.cpp
Show All 9 Lines | |||||
#include "Config.h" | #include "Config.h" | ||||
#include "LinkerScript.h" | #include "LinkerScript.h" | ||||
#include "SymbolTable.h" | #include "SymbolTable.h" | ||||
#include "SyntheticSections.h" | #include "SyntheticSections.h" | ||||
#include "Target.h" | #include "Target.h" | ||||
#include "lld/Common/Memory.h" | #include "lld/Common/Memory.h" | ||||
#include "lld/Common/Strings.h" | #include "lld/Common/Strings.h" | ||||
#include "llvm/BinaryFormat/Dwarf.h" | #include "llvm/BinaryFormat/Dwarf.h" | ||||
#include "llvm/Support/Compression.h" | #include "llvm/Config/config.h" // LLVM_ENABLE_ZLIB | ||||
mgorny: This breaks the build against installed LLVM since `config.h` is a private header. I guess… | |||||
#include "llvm/Support/MD5.h" | #include "llvm/Support/MD5.h" | ||||
#include "llvm/Support/MathExtras.h" | #include "llvm/Support/MathExtras.h" | ||||
#include "llvm/Support/Parallel.h" | #include "llvm/Support/Parallel.h" | ||||
#include "llvm/Support/SHA1.h" | #include "llvm/Support/SHA1.h" | ||||
#include "llvm/Support/TimeProfiler.h" | #include "llvm/Support/TimeProfiler.h" | ||||
#include <regex> | #include <regex> | ||||
#include <unordered_set> | #include <unordered_set> | ||||
#if LLVM_ENABLE_ZLIB | |||||
#include <zlib.h> | |||||
#endif | |||||
using namespace llvm; | using namespace llvm; | ||||
using namespace llvm::dwarf; | using namespace llvm::dwarf; | ||||
using namespace llvm::object; | using namespace llvm::object; | ||||
using namespace llvm::support::endian; | using namespace llvm::support::endian; | ||||
using namespace llvm::ELF; | using namespace llvm::ELF; | ||||
using namespace lld; | using namespace lld; | ||||
using namespace lld::elf; | using namespace lld::elf; | ||||
▲ Show 20 Lines • Show All 245 Lines • ▼ Show 20 Lines | |||||
static void fill(uint8_t *buf, size_t size, | static void fill(uint8_t *buf, size_t size, | ||||
const std::array<uint8_t, 4> &filler) { | const std::array<uint8_t, 4> &filler) { | ||||
size_t i = 0; | size_t i = 0; | ||||
for (; i + 4 < size; i += 4) | for (; i + 4 < size; i += 4) | ||||
memcpy(buf + i, filler.data(), 4); | memcpy(buf + i, filler.data(), 4); | ||||
memcpy(buf + i, filler.data(), size - i); | memcpy(buf + i, filler.data(), size - i); | ||||
} | } | ||||
#if LLVM_ENABLE_ZLIB | |||||
static SmallVector<uint8_t, 0> deflateShard(ArrayRef<uint8_t> in, int level, | |||||
Not Done ReplyInline ActionsI'm wondering if you have considered using llvm/Support/Compression.h alexander-shaposhnikov: I'm wondering if you have considered using llvm/Support/Compression.h
(the implementation there… | |||||
The code is largely lld/ELF specific. If I add the code to llvm/Support/Compression.h, LLVMSupport will get bloated. Technically llvm-objcopy --compress-debug-sections can use the code as well but the two projects may have different tweaks and sharing code won't help much in my opinion. MaskRay: The code is largely lld/ELF specific. If I add the code to llvm/Support/Compression.h… | |||||
Not Done ReplyInline Actionsjust in case - after looking at https://zlib.net/manual.html and https://llvm.org/doxygen/Compression_8cpp_source.html - p.s. Compression.h contains wrappers around compress2, but what's going on here is a bit different, alexander-shaposhnikov: just in case - after looking at https://zlib.net/manual.html and https://llvm. | |||||
int flush) { | |||||
// 15 and 8 are default. windowBits=-15 is negative to generate raw deflate | |||||
// data with no zlib header or trailer. | |||||
z_stream s = {}; | |||||
deflateInit2(&s, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); | |||||
s.next_in = const_cast<uint8_t *>(in.data()); | |||||
s.avail_in = in.size(); | |||||
// Allocate a buffer of half of the input size, and grow it by 1.5x if | |||||
// insufficient. | |||||
Typo // Allocate a buffer peter.smith: Typo // Allocate a buffer | |||||
SmallVector<uint8_t, 0> out; | |||||
size_t pos = 0; | |||||
out.resize_for_overwrite(std::max<size_t>(in.size() / 2, 64)); | |||||
do { | |||||
if (pos == out.size()) | |||||
out.resize_for_overwrite(out.size() * 3 / 2); | |||||
s.next_out = out.data() + pos; | |||||
s.avail_out = out.size() - pos; | |||||
(void)deflate(&s, flush); | |||||
pos = s.next_out - out.data(); | |||||
} while (s.avail_out == 0); | |||||
assert(s.avail_in == 0); | |||||
out.truncate(pos); | |||||
deflateEnd(&s); | |||||
return out; | |||||
} | |||||
#endif | |||||
// Compress section contents if this section contains debug info. | // Compress section contents if this section contains debug info. | ||||
template <class ELFT> void OutputSection::maybeCompress() { | template <class ELFT> void OutputSection::maybeCompress() { | ||||
#if LLVM_ENABLE_ZLIB | |||||
using Elf_Chdr = typename ELFT::Chdr; | using Elf_Chdr = typename ELFT::Chdr; | ||||
// Compress only DWARF debug sections. | // Compress only DWARF debug sections. | ||||
if (!config->compressDebugSections || (flags & SHF_ALLOC) || | if (!config->compressDebugSections || (flags & SHF_ALLOC) || | ||||
!name.startswith(".debug_")) | !name.startswith(".debug_") || size == 0) | ||||
return; | return; | ||||
llvm::TimeTraceScope timeScope("Compress debug sections"); | llvm::TimeTraceScope timeScope("Compress debug sections"); | ||||
// Create a section header. | // Create a section header. | ||||
zDebugHeader.resize(sizeof(Elf_Chdr)); | zDebugHeader.resize(sizeof(Elf_Chdr)); | ||||
auto *hdr = reinterpret_cast<Elf_Chdr *>(zDebugHeader.data()); | auto *hdr = reinterpret_cast<Elf_Chdr *>(zDebugHeader.data()); | ||||
hdr->ch_type = ELFCOMPRESS_ZLIB; | hdr->ch_type = ELFCOMPRESS_ZLIB; | ||||
hdr->ch_size = size; | hdr->ch_size = size; | ||||
hdr->ch_addralign = alignment; | hdr->ch_addralign = alignment; | ||||
// Write section contents to a temporary buffer and compress it. | // Write section contents to a temporary buffer and compress it. | ||||
std::vector<uint8_t> buf(size); | std::vector<uint8_t> buf(size); | ||||
This zero fills the buffer, but I have tested that removing it and adding gap filling in writeTo does not improve performance. MaskRay: This zero fills the buffer, but I have tested that removing it and adding gap filling in… | |||||
writeTo<ELFT>(buf.data()); | writeTo<ELFT>(buf.data()); | ||||
// We chose 1 as the default compression level because it is the fastest. If | // We chose 1 as the default compression level because it is the fastest. If | ||||
Not Done ReplyInline ActionsMaybe mention Z_BEST_SPEED instead of just 1? ikudrin: Maybe mention `Z_BEST_SPEED` instead of just `1`? | |||||
// -O2 is given, we use level 6 to compress debug info more by ~15%. We found | // -O2 is given, we use level 6 to compress debug info more by ~15%. We found | ||||
// that level 7 to 9 doesn't make much difference (~1% more compression) while | // that level 7 to 9 doesn't make much difference (~1% more compression) while | ||||
// they take significant amount of time (~2x), so level 6 seems enough. | // they take significant amount of time (~2x), so level 6 seems enough. | ||||
if (Error e = zlib::compress(toStringRef(buf), compressedData, | const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED; | ||||
config->optimize >= 2 ? 6 : 1)) | |||||
fatal("compress failed: " + llvm::toString(std::move(e))); | // Split input into 1-MiB shards. | ||||
constexpr size_t shardSize = 1 << 20; | |||||
const size_t numShards = (size + shardSize - 1) / shardSize; | |||||
auto shardsIn = std::make_unique<ArrayRef<uint8_t>[]>(numShards); | |||||
for (size_t i = 0, start = 0, end; start != buf.size(); ++i, start = end) { | |||||
end = std::min(start + shardSize, buf.size()); | |||||
shardsIn[i] = makeArrayRef<uint8_t>(buf.data() + start, end - start); | |||||
} | |||||
// Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all | |||||
// shards but the last to flush the output to a byte boundary to be | |||||
// concatenated with the next shard. | |||||
auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards); | |||||
auto shardsAdler = std::make_unique<uint32_t[]>(numShards); | |||||
parallelForEachN(0, numShards, [&](size_t i) { | |||||
shardsOut[i] = deflateShard(shardsIn[i], level, | |||||
i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH); | |||||
shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size()); | |||||
}); | |||||
// Update section headers. | // Update section size and combine Alder-32 checksums. | ||||
size = sizeof(Elf_Chdr) + compressedData.size(); | uint32_t checksum = 1; // Initial Adler-32 value | ||||
size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header | |||||
for (size_t i = 0; i != numShards; ++i) { | |||||
size += shardsOut[i].size(); | |||||
checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size()); | |||||
} | |||||
size += 4; // checksum | |||||
compressed.shards = std::move(shardsOut); | |||||
compressed.numShards = numShards; | |||||
compressed.checksum = checksum; | |||||
flags |= SHF_COMPRESSED; | flags |= SHF_COMPRESSED; | ||||
Is it worth picking a plural as there can be more than one shard? Similarly for out and adler. For example ins, outs and adlers. I'm not sure ins and outs sound right though, perharps shardsIn and shardsOut. Again not a strong opinion. peter.smith: Is it worth picking a plural as there can be more than one shard? Similarly for out and adler. | |||||
#endif | |||||
Might be worth using start and end rather than i and j? I've not got a strong opinion here, happy to keep with i, j if you prefer. peter.smith: Might be worth using start and end rather than i and j? I've not got a strong opinion here… | |||||
} | } | ||||
static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) { | static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) { | ||||
if (size == 1) | if (size == 1) | ||||
*buf = data; | *buf = data; | ||||
else if (size == 2) | else if (size == 2) | ||||
write16(buf, data); | write16(buf, data); | ||||
else if (size == 4) | else if (size == 4) | ||||
The code above use idx for going through in[] and i for something else, could be worth using the same value? peter.smith: The code above use idx for going through in[] and i for something else, could be worth using… | |||||
write32(buf, data); | write32(buf, data); | ||||
else if (size == 8) | else if (size == 8) | ||||
write64(buf, data); | write64(buf, data); | ||||
else | else | ||||
llvm_unreachable("unsupported Size argument"); | llvm_unreachable("unsupported Size argument"); | ||||
} | } | ||||
template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { | template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { | ||||
llvm::TimeTraceScope timeScope("Write sections", name); | llvm::TimeTraceScope timeScope("Write sections", name); | ||||
if (type == SHT_NOBITS) | if (type == SHT_NOBITS) | ||||
return; | return; | ||||
// If --compress-debug-section is specified and if this is a debug section, | // If --compress-debug-section is specified and if this is a debug section, | ||||
// we've already compressed section contents. If that's the case, | // we've already compressed section contents. If that's the case, | ||||
// just write it down. | // just write it down. | ||||
if (!compressedData.empty()) { | if (compressed.shards) { | ||||
memcpy(buf, zDebugHeader.data(), zDebugHeader.size()); | memcpy(buf, zDebugHeader.data(), zDebugHeader.size()); | ||||
memcpy(buf + zDebugHeader.size(), compressedData.data(), | buf += zDebugHeader.size(); | ||||
compressedData.size()); | size -= zDebugHeader.size(); | ||||
// Compute shard offsets. | |||||
auto offsets = std::make_unique<size_t[]>(compressed.numShards); | |||||
offsets[0] = 2; // zlib header | |||||
for (size_t i = 1; i != compressed.numShards; ++i) | |||||
offsets[i] = offsets[i - 1] + compressed.shards[i - 1].size(); | |||||
buf[0] = 0x78; // CMF | |||||
buf[1] = 0x01; // FLG: best speed | |||||
parallelForEachN(0, compressed.numShards, [&](size_t i) { | |||||
memcpy(buf + offsets[i], compressed.shards[i].data(), | |||||
compressed.shards[i].size()); | |||||
}); | |||||
write32be(buf + size - 4, compressed.checksum); | |||||
return; | return; | ||||
} | } | ||||
// Write leading padding. | // Write leading padding. | ||||
SmallVector<InputSection *, 0> sections = getInputSections(*this); | SmallVector<InputSection *, 0> sections = getInputSections(*this); | ||||
std::array<uint8_t, 4> filler = getFiller(); | std::array<uint8_t, 4> filler = getFiller(); | ||||
bool nonZeroFiller = read32(filler.data()) != 0; | bool nonZeroFiller = read32(filler.data()) != 0; | ||||
if (nonZeroFiller) | if (nonZeroFiller) | ||||
▲ Show 20 Lines • Show All 245 Lines • Show Last 20 Lines |
This breaks the build against installed LLVM since config.h is a private header. I guess you're looking to add a new constant to llvm-config.h.