diff --git a/lld/test/wasm/Inputs/merge-string-debug2.s b/lld/test/wasm/Inputs/merge-string-debug2.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/merge-string-debug2.s @@ -0,0 +1,4 @@ +.section .debug_str,"S",@ + .asciz "clang version 13.0.0" + .asciz "bar" + .asciz "foo" diff --git a/lld/test/wasm/merge-string-debug.s b/lld/test/wasm/merge-string-debug.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/merge-string-debug.s @@ -0,0 +1,22 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/merge-string-debug2.s -o %t2.o + +# RUN: wasm-ld %t.o %t2.o -o %t.wasm --no-entry +# RUN: llvm-readobj -x .debug_str %t.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O1 + +# RUN: wasm-ld -O0 %t.o %t2.o -o %tO0.wasm --no-entry +# RUN: llvm-readobj -x .debug_str %tO0.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O0 + +.section .debug_str,"S",@ + .asciz "clang version 13.0.0" + .asciz "foobar" + +# CHECK: Hex dump of section '.debug_str': + +# CHECK-O0: 0x00000000 636c616e 67207665 7273696f 6e203133 clang version 13 +# CHECK-O0: 0x00000010 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan +# CHECK-O0: 0x00000020 67207665 7273696f 6e203133 2e302e30 g version 13.0.0 +# CHECK-O0: 0x00000030 00626172 00666f6f 00 .bar.foo. + +# CHECK-O1: 0x00000000 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang +# CHECK-O1: 0x00000010 20766572 73696f6e 2031332e 302e3000 version 13.0.0. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -804,6 +804,10 @@ if (auto *s = dyn_cast(seg)) s->splitIntoPieces(); } + for (InputSection *sec : file->customSections) { + if (auto *s = dyn_cast(sec)) + s->splitIntoPieces(); + } }); } diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -43,7 +43,11 @@ MergedSegment, Function, SyntheticFunction, - Section + Section, + // TODO(sbc): Switch debug info to use data segments so we can eliminte + // the duplication here with Merge+MergedSegment. + MergeSection, + MergedSection, }; Kind kind() const { return sectionKind; } @@ -156,6 +160,7 @@ }; class SyntheticMergedDataSegment; +class SyntheticMergedInputSection; // Merge segment handling copied from lld/ELF/InputSection.h. Keep in sync // where possible. @@ -232,7 +237,7 @@ return c->kind() == InputChunk::MergedSegment; } - uint32_t getSize() const override; + uint32_t getSize() const override { return builder.getSize(); } StringRef getName() const override { return name; } @@ -349,17 +354,19 @@ // Represents a single Wasm Section within an input file. class InputSection : public InputChunk { public: - InputSection(const WasmSection &s, ObjFile *f) - : InputChunk(f, InputChunk::Section), section(s), tombstoneValue(getTombstoneForSection(s.Name)) { - assert(section.Type == llvm::wasm::WASM_SEC_CUSTOM); + InputSection(StringRef name, const WasmSection *s, ObjFile *f) + : InputChunk(f, InputChunk::Section), section(s), + tombstoneValue(getTombstoneForSection(name)) { + assert(!section || section->Type == llvm::wasm::WASM_SEC_CUSTOM); } - StringRef getName() const override { return section.Name; } + StringRef getName() const override { return section->Name; } StringRef getDebugName() const override { return StringRef(); } - uint32_t getComdat() const override { return section.Comdat; } + uint32_t getComdat() const override { return section->Comdat; } + uint64_t getOffset(uint64_t offset) const; protected: - ArrayRef data() const override { return section.Content; } + ArrayRef data() const override { return section->Content; } // Offset within the input section. This is only zero since this chunk // type represents an entire input section, not part of one. @@ -367,10 +374,85 @@ uint64_t getTombstone() const override { return tombstoneValue; } static uint64_t getTombstoneForSection(StringRef name); - const WasmSection §ion; + const WasmSection *section; const uint64_t tombstoneValue; }; +// Similar to MergeInputSegment but for custom sections rather +// than data segments. +class MergeInputSection : public InputSection { +public: + MergeInputSection(StringRef name, const WasmSection *s, ObjFile *f) + : InputSection(name, s, f) { + sectionKind = MergeSection; + } + + static bool classof(const InputChunk *s) { return s->kind() == MergeSection; } + void splitIntoPieces(); + + // Translate an offset in the input section to an offset in the parent + // MergeSyntheticSection. + uint64_t getParentOffset(uint64_t offset) const; + + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector pieces; + + // Returns I'th piece's data. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getData(size_t i) const { + size_t begin = pieces[i].inputOff; + size_t end = + (pieces.size() - 1 == i) ? data().size() : pieces[i + 1].inputOff; + return {toStringRef(data().slice(begin, end - begin)), pieces[i].hash}; + } + + SegmentPiece *getSectionPiece(uint64_t offset); + const SegmentPiece *getSectionPiece(uint64_t offset) const { + return const_cast(this)->getSectionPiece(offset); + } + + SyntheticMergedInputSection *parent = nullptr; + +private: + void splitStrings(ArrayRef a); +}; + +// Similar to SyntheticMergedDataSegment but for custom sections rather +// than data segments. +class SyntheticMergedInputSection : public InputSection { +public: + SyntheticMergedInputSection(StringRef name) + : InputSection(name, nullptr, nullptr), name(name), + builder(llvm::StringTableBuilder::RAW, 1ULL) { + sectionKind = MergedSection; + } + static bool classof(const InputChunk *s) { + return s->kind() == MergedSection; + } + + StringRef getName() const override { return name; } + + uint32_t getComdat() const override { return 0; } + + uint32_t getSize() const override { return builder.getSize(); } + + void writeTo(uint8_t *buf) const override; + + void finalizeContents(); + + void addMergeSection(MergeInputSection *ms) { + ms->parent = this; + sections.push_back(ms); + } + +protected: + std::vector sections; + StringRef name; + llvm::StringTableBuilder builder; +}; + } // namespace wasm std::string toString(const wasm::InputChunk *); diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -436,10 +436,6 @@ return piece->outputOff + addend; } -uint32_t SyntheticMergedDataSegment::getSize() const { - return builder.getSize(); -} - void SyntheticMergedDataSegment::writeTo(uint8_t *buf) const { builder.write(buf + outSecOff); @@ -482,5 +478,87 @@ return UINT64_C(-1); } +uint64_t InputSection::getOffset(uint64_t offset) const { + if (const MergeInputSection *ms = dyn_cast(this)) { + LLVM_DEBUG(dbgs() << "getOffset(merged): " << getName() << "\n"); + LLVM_DEBUG(dbgs() << "offset: " << offset << "\n"); + LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset) + << "\n"); + assert(ms->parent); + return ms->parent->getOffset(ms->getParentOffset(offset)); + } + return outSecOff + offset; +} + +void MergeInputSection::splitStrings(ArrayRef data) { + LLVM_DEBUG(llvm::dbgs() << "splitStrings\n"); + size_t off = 0; + StringRef s = toStringRef(data); + + while (!s.empty()) { + size_t end = s.find(0); + if (end == StringRef::npos) + fatal(toString(this) + ": string is not null terminated"); + size_t size = end + 1; + + pieces.emplace_back(off, xxHash64(s.substr(0, size)), true); + s = s.substr(size); + off += size; + } +} + +void MergeInputSection::splitIntoPieces() { + assert(pieces.empty()); + splitStrings(data()); +} + +SegmentPiece *MergeInputSection::getSectionPiece(uint64_t offset) { + if (this->data().size() <= offset) + fatal(toString(this) + ": offset is outside the section"); + + // If Offset is not at beginning of a section piece, it is not in the map. + // In that case we need to do a binary search of the original section piece + // vector. + auto it = partition_point( + pieces, [=](SegmentPiece p) { return p.inputOff <= offset; }); + return &it[-1]; +} + +uint64_t MergeInputSection::getParentOffset(uint64_t offset) const { + // If Offset is not at beginning of a section piece, it is not in the map. + // In that case we need to search from the original section piece vector. + const SegmentPiece *piece = getSectionPiece(offset); + uint64_t addend = offset - piece->inputOff; + return piece->outputOff + addend; +} + +void SyntheticMergedInputSection::writeTo(uint8_t *buf) const { + builder.write(buf + outSecOff); + + // Apply relocations + relocate(buf + outSecOff); +} + +void SyntheticMergedInputSection::finalizeContents() { + // Add all string pieces to the string table builder to create section + // contents. + for (MergeInputSection *sec : sections) { + for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) + if (sec->pieces[i].live) + builder.add(sec->getData(i)); + } + + // Fix the string table content. After this, the contents will never change. + builder.finalize(); + + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding SectionPiece for easy access. + for (MergeInputSection *sec : sections) + for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) + if (sec->pieces[i].live) + sec->pieces[i].outputOff = builder.getOffset(sec->getData(i)); +} + } // namespace wasm } // namespace lld diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -360,6 +360,14 @@ config->legacyFunctionTable = true; } +static bool shouldMerge(const WasmSection &sec) { + if (config->optimize == 0) + return false; + // Sadly we don't have section attributes yet for custom sections, so we + // have to go on the name alone. + return sec.Name.startswith(".debug_str"); +} + static bool shouldMerge(const WasmSegment &seg) { // As of now we only support merging strings, and only with single byte // alignment (2^0). @@ -445,7 +453,11 @@ assert(!dataSection); dataSection = §ion; } else if (section.Type == WASM_SEC_CUSTOM) { - auto *customSec = make(section, this); + InputSection *customSec; + if (shouldMerge(section)) + customSec = make(section.Name, §ion, this); + else + customSec = make(section.Name, §ion, this); customSec->discarded = isExcludedByComdat(customSec); customSections.emplace_back(customSec); customSections.back()->setRelocations(section.Relocations); diff --git a/lld/wasm/OutputSections.h b/lld/wasm/OutputSections.h --- a/lld/wasm/OutputSections.h +++ b/lld/wasm/OutputSections.h @@ -128,8 +128,9 @@ void finalizeContents() override; protected: + void finalizeInputSections(); size_t payloadSize = 0; - ArrayRef inputSections; + std::vector inputSections; std::string nameData; }; diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -12,6 +12,7 @@ #include "OutputSegment.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" @@ -234,7 +235,39 @@ return false; } +// Lots of duplication here with OutputSegment::finalizeInputSegments +void CustomSection::finalizeInputSections() { + SyntheticMergedInputSection *mergedSection = nullptr; + std::vector newSections; + + for (InputSection *s : inputSections) { + MergeInputSection *ms = dyn_cast(s); + if (!ms) { + newSections.push_back(s); + continue; + } + + // A section should not make it here unless its alive + // assert(ms->live); + + if (!mergedSection) { + mergedSection = make(name); + // syn->outputSec = this; + newSections.push_back(mergedSection); + } + mergedSection->addMergeSection(ms); + } + + if (!mergedSection) + return; + + mergedSection->finalizeContents(); + inputSections = newSections; +} + void CustomSection::finalizeContents() { + finalizeInputSections(); + raw_string_ostream os(nameData); encodeULEB128(name.size(), os); os << name; diff --git a/lld/wasm/OutputSegment.cpp b/lld/wasm/OutputSegment.cpp --- a/lld/wasm/OutputSegment.cpp +++ b/lld/wasm/OutputSegment.cpp @@ -56,16 +56,16 @@ return seg->flags == ms->flags && seg->alignment == ms->alignment; }); if (i == mergedSegments.end()) { - LLVM_DEBUG(llvm::dbgs() << "new merge section: " << name + LLVM_DEBUG(llvm::dbgs() << "new merge segment: " << name << " alignment=" << ms->alignment << "\n"); - SyntheticMergedDataSegment *syn = + auto *syn = make(name, ms->alignment, ms->flags); syn->outputSeg = this; mergedSegments.push_back(syn); i = std::prev(mergedSegments.end()); newSegments.push_back(syn); } else { - LLVM_DEBUG(llvm::dbgs() << "adding to merge section: " << name << "\n"); + LLVM_DEBUG(llvm::dbgs() << "adding to merge segment: " << name << "\n"); } (*i)->addMergeSegment(ms); } diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" @@ -610,8 +611,9 @@ getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym, unsigned UniqueID = GenericSectionID); - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) { - return getWasmSection(Section, K, 0, nullptr); + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags = 0) { + return getWasmSection(Section, K, Flags, nullptr); } MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -791,9 +791,10 @@ DwarfLineSection = Ctx->getWasmSection(".debug_line", SectionKind::getMetadata()); DwarfLineStrSection = - Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata()); - DwarfStrSection = - Ctx->getWasmSection(".debug_str", SectionKind::getMetadata()); + Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata(), + wasm::WASM_SEG_FLAG_STRINGS); + DwarfStrSection = Ctx->getWasmSection( + ".debug_str", SectionKind::getMetadata(), wasm::WASM_SEG_FLAG_STRINGS); DwarfLocSection = Ctx->getWasmSection(".debug_loc", SectionKind::getMetadata()); DwarfAbbrevSection = @@ -836,7 +837,8 @@ DwarfAbbrevDWOSection = Ctx->getWasmSection(".debug_abbrev.dwo", SectionKind::getMetadata()); DwarfStrDWOSection = - Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata()); + Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata(), + wasm::WASM_SEG_FLAG_STRINGS); DwarfLineDWOSection = Ctx->getWasmSection(".debug_line.dwo", SectionKind::getMetadata()); DwarfLocDWOSection =